1 year ago · 06a9aceb8e
--- a/Makefile
+++ b/Makefile
@@ -40,6 +40,10 @@ black:  ## format the code using black
 
															 ruff-fixes:  ## fix the code using ruff
														
 
															 	poetry run ruff src tests scripts docs/source/conf.py docs/source/notebooks/*.py --fix
														
 
															+.PHONY: ruff-fixes-current
														
 
															+ruff-fixes-current:  ## fix the code using ruff
														
 
															+	poetry run ruff src/unfccc_ghg_data/unfccc_reader --fix
														
 
															+
														
 
															 .PHONY: test
														
 
															 test:  ## run the tests
														
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -4,17 +4,16 @@ Configuration file for the Sphinx documentation builder.
 
															 For the full list of built-in configuration values, see the documentation:
														
 
															 https://www.sphinx-doc.org/en/master/usage/configuration.html
														
 
															 """
														
 
															+import os
														
 
															 from functools import wraps
														
 
															+from pathlib import Path
														
 
															 from sphinxcontrib_autodocgen import AutoDocGen
														
 
															-import os
														
 
															-from pathlib import Path
														
 
															 os.environ["UNFCCC_GHG_ROOT_PATH"] = str(Path("..") / "..")
														
 
															 import unfccc_ghg_data
														
 
															-
														
 
															 # -- Project information -----------------------------------------------------
														
 
															 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
														
@@ -58,7 +57,7 @@ extensions = [
 
															     # math support
														
 
															     "sphinx.ext.mathjax",
														
 
															     # execute code
														
 
															-    # "sphinx_exec_code",
														
 
															+    "sphinx_exec_code",
														
 
															 ]
														
 
															 # general sphinx settings
														
@@ -144,10 +143,10 @@ nb_execution_show_tb = True
 
															 nb_execution_timeout = 120
														
 
															 nb_custom_formats = {".py": ["jupytext.reads", {"fmt": "py:percent"}]}
														
 
															-# # exec-code config
														
 
															-# exec_code_working_dir = Path('..') / '..'
														
 
															-# exec_code_source_folders = [Path('..') / '..' / 'src' / 'unfccc_ghg_data']
														
 
															-# exec_code_example_dir = '.'
														
 
															+# exec-code config
														
 
															+exec_code_working_dir = "."  # Path('..') / '..'
														
 
															+exec_code_source_folders = [Path("..") / ".." / "src" / "unfccc_ghg_data"]
														
 
															+exec_code_example_dir = "."
														
 
															 # -- Options for HTML output -------------------------------------------------
														
 
															 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
														
--- a/poetry.lock
+++ b/poetry.lock
@@ -1068,6 +1068,20 @@ smb = ["smbprotocol"]
 
															 ssh = ["paramiko"]
														
 
															 tqdm = ["tqdm"]
														
 
															+[[package]]
														
 
															+name = "ghostscript"
														
 
															+version = "0.7"
														
 
															+description = "Interface to the Ghostscript C-API, both high- and low-level, based on ctypes"
														
 
															+optional = false
														
 
															+python-versions = "*"
														
 
															+files = [
														
 
															+    {file = "ghostscript-0.7-py2.py3-none-any.whl", hash = "sha256:97c70e27ba6b1cab4ab1d9b4cc82d89b8b53e57971f608ded4950b8aa20c78a7"},
														
 
															+    {file = "ghostscript-0.7.tar.gz", hash = "sha256:b7875a87098740eb0be3de2d9662d15db727305ca9a6d4b7534a3cc33a4b965a"},
														
 
															+]
														
 
															+
														
 
															+[package.dependencies]
														
 
															+setuptools = ">=38.6.0"
														
 
															+
														
 
															 [[package]]
														
 
															 name = "globalwarmingpotentials"
														
 
															 version = "0.9.3"
														
@@ -4375,4 +4389,4 @@ plots = ["matplotlib"]
 
															 [metadata]
														
 
															 lock-version = "2.0"
														
 
															 python-versions = "^3.9"
														
 
															-content-hash = "db0b517e6af6c99b04624df636fc38cdf49b3ec8dd6dce24596da1cf5796c0ac"
														
 
															+content-hash = "3591f5e1b1134c148b9f68e3861beb4961659d1af5cb4dd7360ef5396a682f2e"
														
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ opencv-python = "^4.8.1.78"
 
															 unfccc-di-api = "^4.0.0"
														
 
															 dask = "^2023.12.0"
														
 
															 sphinx-exec-code = "^0.10"
														
 
															+ghostscript = "^0.7"
														
 
															 [tool.poetry.extras]
														
 
															 plots = ["matplotlib"]
														
--- a/src/unfccc_ghg_data/__init__.py
+++ b/src/unfccc_ghg_data/__init__.py
@@ -6,15 +6,20 @@ different methods from APIs, xlsx and csv files as well as pdf files.
 
															 """
														
 
															 import importlib.metadata
														
 
															-from . import (helper, unfccc_reader, unfccc_downloader, unfccc_crf_reader,
														
 
															-               unfccc_di_reader)
														
 
															+from . import (
														
 
															+    helper,
														
 
															+    unfccc_crf_reader,
														
 
															+    unfccc_di_reader,
														
 
															+    unfccc_downloader,
														
 
															+    unfccc_reader,
														
 
															+)
														
 
															 __all__ = [
														
 
															     "helper",
														
 
															     "unfccc_reader",
														
 
															     "unfccc_crf_reader",
														
 
															     "unfccc_di_reader",
														
 
															-    "unfccc_downloader"
														
 
															+    "unfccc_downloader",
														
 
															 ]
														
 
															 __version__ = importlib.metadata.version("unfccc_ghg_data")
														
--- a/src/unfccc_ghg_data/helper/__init__.py
+++ b/src/unfccc_ghg_data/helper/__init__.py
@@ -25,10 +25,10 @@ from .functions import (
 
															     convert_categories,
														
 
															     create_folder_mapping,
														
 
															     fix_rows,
														
 
															+    get_code_file,
														
 
															     get_country_code,
														
 
															     get_country_name,
														
 
															     process_data_for_country,
														
 
															-    get_code_file,
														
 
															 )
														
 
															 __all__ = [
														
--- a/src/unfccc_ghg_data/helper/definitions.py
+++ b/src/unfccc_ghg_data/helper/definitions.py
@@ -6,14 +6,14 @@ from pathlib import Path
 
															 def get_root_path() -> Path:
														
 
															     """Get the root_path from an environment variable"""
														
 
															-    root_path_env = os.getenv('UNFCCC_GHG_ROOT_PATH', None)
														
 
															+    root_path_env = os.getenv("UNFCCC_GHG_ROOT_PATH", None)
														
 
															     if root_path_env is None:
														
 
															-        raise ValueError('UNFCCC_GHG_ROOT_PATH environment '
														
 
															-                         'variable needs to be set') # noqa: TRY003
														
 
															+        raise ValueError("UNFCCC_GHG_ROOT_PATH environment " "variable needs to be set")
														
 
															     else:
														
 
															         root_path = Path(root_path_env).resolve()
														
 
															     return root_path
														
 
															+
														
 
															 root_path = get_root_path()
														
 
															 code_path = root_path / "src" / "unfccc_ghg_data"
														
 
															 log_path = root_path / "log"
														
@@ -36,125 +36,250 @@ custom_country_mapping = {
 
															 }
														
 
															 custom_folders = {
														
 
															-    'Venezeula_(Bolivarian_Republic_of)': 'VEN',
														
 
															-    'Venezuela_(Bolivarian_Republic_of)': 'VEN',
														
 
															-    'Micronesia_(Federated_State_of)': 'FSM',
														
 
															-    'Micronesia_(Federated_States_of)': 'FSM',
														
 
															-    'The_Republic_of_North_Macedonia': 'MKD',
														
 
															-    'Republic_of_Korea': 'KOR',
														
 
															-    'Bolivia_(Plurinational_State_of)': 'BOL',
														
 
															-    'Türkiye': 'TUR',
														
 
															-    'Iran_(Islamic_Republic_of)': 'IRN',
														
 
															-    'Côte_d`Ivoire': 'CIV',
														
 
															-    'Democratic_Republic_of_the_Congo': "COD",
														
 
															-    'European_Union': 'EUA',
														
 
															-    'Taiwan': 'TWN',
														
 
															+    "Venezeula_(Bolivarian_Republic_of)": "VEN",
														
 
															+    "Venezuela_(Bolivarian_Republic_of)": "VEN",
														
 
															+    "Micronesia_(Federated_State_of)": "FSM",
														
 
															+    "Micronesia_(Federated_States_of)": "FSM",
														
 
															+    "The_Republic_of_North_Macedonia": "MKD",
														
 
															+    "Republic_of_Korea": "KOR",
														
 
															+    "Bolivia_(Plurinational_State_of)": "BOL",
														
 
															+    "Türkiye": "TUR",
														
 
															+    "Iran_(Islamic_Republic_of)": "IRN",
														
 
															+    "Côte_d`Ivoire": "CIV",
														
 
															+    "Democratic_Republic_of_the_Congo": "COD",
														
 
															+    "European_Union": "EUA",
														
 
															+    "Taiwan": "TWN",
														
 
															 }
														
 
															 GWP_factors = {
														
 
															-    'SARGWP100_to_AR4GWP100': {
														
 
															-        'HFCS': 1.1,
														
 
															-        'PFCS': 1.1,
														
 
															-        'UnspMixOfHFCs': 1.1,
														
 
															-        'UnspMixOfPFCs': 1.1,
														
 
															-        'FGASES': 1.1,
														
 
															+    "SARGWP100_to_AR4GWP100": {
														
 
															+        "HFCS": 1.1,
														
 
															+        "PFCS": 1.1,
														
 
															+        "UnspMixOfHFCs": 1.1,
														
 
															+        "UnspMixOfPFCs": 1.1,
														
 
															+        "FGASES": 1.1,
														
 
															     },
														
 
															-    'SARGWP100_to_AR5GWP100': {
														
 
															-        'HFCS': 1.2,
														
 
															-        'PFCS': 1.2,
														
 
															-        'UnspMixOfHFCs': 1.2,
														
 
															-        'UnspMixOfPFCs': 1.2,
														
 
															-        'FGASES': 1.2,
														
 
															+    "SARGWP100_to_AR5GWP100": {
														
 
															+        "HFCS": 1.2,
														
 
															+        "PFCS": 1.2,
														
 
															+        "UnspMixOfHFCs": 1.2,
														
 
															+        "UnspMixOfPFCs": 1.2,
														
 
															+        "FGASES": 1.2,
														
 
															     },
														
 
															-    'SARGWP100_to_AR6GWP100': {
														
 
															-        'HFCS': 1.4,
														
 
															-        'PFCS': 1.3,
														
 
															-        'UnspMixOfHFCs': 1.4,
														
 
															-        'UnspMixOfPFCs': 1.3,
														
 
															-        'FGASES': 1.35,
														
 
															+    "SARGWP100_to_AR6GWP100": {
														
 
															+        "HFCS": 1.4,
														
 
															+        "PFCS": 1.3,
														
 
															+        "UnspMixOfHFCs": 1.4,
														
 
															+        "UnspMixOfPFCs": 1.3,
														
 
															+        "FGASES": 1.35,
														
 
															     },
														
 
															-    'AR4GWP100_to_SARGWP100': {
														
 
															-        'HFCS': 0.91,
														
 
															-        'PFCS': 0.91,
														
 
															-        'UnspMixOfHFCs': 0.91,
														
 
															-        'UnspMixOfPFCs': 0.91,
														
 
															-        'FGASES': 0.91,
														
 
															+    "AR4GWP100_to_SARGWP100": {
														
 
															+        "HFCS": 0.91,
														
 
															+        "PFCS": 0.91,
														
 
															+        "UnspMixOfHFCs": 0.91,
														
 
															+        "UnspMixOfPFCs": 0.91,
														
 
															+        "FGASES": 0.91,
														
 
															     },
														
 
															-    'AR4GWP100_to_AR5GWP100': {
														
 
															-        'HFCS': 1.1,
														
 
															-        'PFCS': 1.1,
														
 
															-        'UnspMixOfHFCs': 1.1,
														
 
															-        'UnspMixOfPFCs': 1.1,
														
 
															-        'FGASES': 1.1,
														
 
															+    "AR4GWP100_to_AR5GWP100": {
														
 
															+        "HFCS": 1.1,
														
 
															+        "PFCS": 1.1,
														
 
															+        "UnspMixOfHFCs": 1.1,
														
 
															+        "UnspMixOfPFCs": 1.1,
														
 
															+        "FGASES": 1.1,
														
 
															     },
														
 
															-    'AR4GWP100_to_AR6GWP100': {
														
 
															-        'HFCS': 1.27,
														
 
															-        'PFCS': 1.18,
														
 
															-        'UnspMixOfHFCs': 1.27,
														
 
															-        'UnspMixOfPFCs': 1.18,
														
 
															-        'FGASES': 1.23,
														
 
															+    "AR4GWP100_to_AR6GWP100": {
														
 
															+        "HFCS": 1.27,
														
 
															+        "PFCS": 1.18,
														
 
															+        "UnspMixOfHFCs": 1.27,
														
 
															+        "UnspMixOfPFCs": 1.18,
														
 
															+        "FGASES": 1.23,
														
 
															     },
														
 
															-    'AR5GWP100_to_SARGWP100': {
														
 
															-        'HFCS': 0.83,
														
 
															-        'PFCS': 0.83,
														
 
															-        'UnspMixOfHFCs': 0.83,
														
 
															-        'UnspMixOfPFCs': 0.83,
														
 
															-        'FGASES': 0.83,
														
 
															+    "AR5GWP100_to_SARGWP100": {
														
 
															+        "HFCS": 0.83,
														
 
															+        "PFCS": 0.83,
														
 
															+        "UnspMixOfHFCs": 0.83,
														
 
															+        "UnspMixOfPFCs": 0.83,
														
 
															+        "FGASES": 0.83,
														
 
															     },
														
 
															-    'AR5GWP100_to_AR4GWP100': {
														
 
															-        'HFCS': 0.91,
														
 
															-        'PFCS': 0.91,
														
 
															-        'UnspMixOfHFCs': 0.91,
														
 
															-        'UnspMixOfPFCs': 0.91,
														
 
															-        'FGASES': 0.91,
														
 
															+    "AR5GWP100_to_AR4GWP100": {
														
 
															+        "HFCS": 0.91,
														
 
															+        "PFCS": 0.91,
														
 
															+        "UnspMixOfHFCs": 0.91,
														
 
															+        "UnspMixOfPFCs": 0.91,
														
 
															+        "FGASES": 0.91,
														
 
															     },
														
 
															-    'AR5GWP100_to_AR6GWP100': {
														
 
															-        'HFCS': 1.17,
														
 
															-        'PFCS': 1.08,
														
 
															-        'UnspMixOfHFCs': 1.17,
														
 
															-        'UnspMixOfPFCs': 1.08,
														
 
															-        'FGASES': 1.125,
														
 
															+    "AR5GWP100_to_AR6GWP100": {
														
 
															+        "HFCS": 1.17,
														
 
															+        "PFCS": 1.08,
														
 
															+        "UnspMixOfHFCs": 1.17,
														
 
															+        "UnspMixOfPFCs": 1.08,
														
 
															+        "FGASES": 1.125,
														
 
															     },
														
 
															 }
														
 
															 gas_baskets = {
														
 
															-    'HFCS (SARGWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
														
 
															-                     'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
														
 
															-                     'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
														
 
															-                     'HFC407c', 'HFC410a', 'HFC4310mee', #'OTHERHFCS (SARGWP100)',
														
 
															-                         'UnspMixOfHFCs (SARGWP100)'],
														
 
															-    'HFCS (AR4GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
														
 
															-                     'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
														
 
															-                     'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
														
 
															-                     'HFC407c', 'HFC410a', 'HFC4310mee', 'UnspMixOfHFCs (AR4GWP100)'],
														
 
															-    'HFCS (AR5GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
														
 
															-                      'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
														
 
															-                      'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
														
 
															-                      'HFC407c', 'HFC410a', 'HFC4310mee',
														
 
															-                         'UnspMixOfHFCs (AR5GWP100)'],
														
 
															-    'HFCS (AR6GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
														
 
															-                      'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
														
 
															-                      'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
														
 
															-                      'HFC407c', 'HFC410a', 'HFC4310mee',
														
 
															-                         'UnspMixOfHFCs (AR6GWP100)'],
														
 
															-    'PFCS (SARGWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
														
 
															-                      'UnspMixOfPFCs (SARGWP100)'],
														
 
															-    'PFCS (AR4GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
														
 
															-                      'UnspMixOfPFCs (AR4GWP100)'],
														
 
															-    'PFCS (AR5GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
														
 
															-                      'UnspMixOfPFCs (AR5GWP100)'],
														
 
															-    'PFCS (AR6GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
														
 
															-                      'UnspMixOfPFCs (AR6GWP100)'],
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (SARGWP100)',
														
 
															-                          'PFCS (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR4GWP100)',
														
 
															-                          'PFCS (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR5GWP100)',
														
 
															-                            'PFCS (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR6GWP100)',
														
 
															-                            'PFCS (AR6GWP100)'],
														
 
															+    "HFCS (SARGWP100)": [
														
 
															+        "HFC23",
														
 
															+        "HFC32",
														
 
															+        "HFC41",
														
 
															+        "HFC125",
														
 
															+        "HFC134",
														
 
															+        "HFC134a",
														
 
															+        "HFC143",
														
 
															+        "HFC143a",
														
 
															+        "HFC152a",
														
 
															+        "HFC227ea",
														
 
															+        "HFC236fa",
														
 
															+        "HFC245ca",
														
 
															+        "HFC245fa",
														
 
															+        "HFC365mfc",
														
 
															+        "HFC404a",
														
 
															+        "HFC407c",
														
 
															+        "HFC410a",
														
 
															+        "HFC4310mee",  #'OTHERHFCS (SARGWP100)',
														
 
															+        "UnspMixOfHFCs (SARGWP100)",
														
 
															+    ],
														
 
															+    "HFCS (AR4GWP100)": [
														
 
															+        "HFC23",
														
 
															+        "HFC32",
														
 
															+        "HFC41",
														
 
															+        "HFC125",
														
 
															+        "HFC134",
														
 
															+        "HFC134a",
														
 
															+        "HFC143",
														
 
															+        "HFC143a",
														
 
															+        "HFC152a",
														
 
															+        "HFC227ea",
														
 
															+        "HFC236fa",
														
 
															+        "HFC245ca",
														
 
															+        "HFC245fa",
														
 
															+        "HFC365mfc",
														
 
															+        "HFC404a",
														
 
															+        "HFC407c",
														
 
															+        "HFC410a",
														
 
															+        "HFC4310mee",
														
 
															+        "UnspMixOfHFCs (AR4GWP100)",
														
 
															+    ],
														
 
															+    "HFCS (AR5GWP100)": [
														
 
															+        "HFC23",
														
 
															+        "HFC32",
														
 
															+        "HFC41",
														
 
															+        "HFC125",
														
 
															+        "HFC134",
														
 
															+        "HFC134a",
														
 
															+        "HFC143",
														
 
															+        "HFC143a",
														
 
															+        "HFC152a",
														
 
															+        "HFC227ea",
														
 
															+        "HFC236fa",
														
 
															+        "HFC245ca",
														
 
															+        "HFC245fa",
														
 
															+        "HFC365mfc",
														
 
															+        "HFC404a",
														
 
															+        "HFC407c",
														
 
															+        "HFC410a",
														
 
															+        "HFC4310mee",
														
 
															+        "UnspMixOfHFCs (AR5GWP100)",
														
 
															+    ],
														
 
															+    "HFCS (AR6GWP100)": [
														
 
															+        "HFC23",
														
 
															+        "HFC32",
														
 
															+        "HFC41",
														
 
															+        "HFC125",
														
 
															+        "HFC134",
														
 
															+        "HFC134a",
														
 
															+        "HFC143",
														
 
															+        "HFC143a",
														
 
															+        "HFC152a",
														
 
															+        "HFC227ea",
														
 
															+        "HFC236fa",
														
 
															+        "HFC245ca",
														
 
															+        "HFC245fa",
														
 
															+        "HFC365mfc",
														
 
															+        "HFC404a",
														
 
															+        "HFC407c",
														
 
															+        "HFC410a",
														
 
															+        "HFC4310mee",
														
 
															+        "UnspMixOfHFCs (AR6GWP100)",
														
 
															+    ],
														
 
															+    "PFCS (SARGWP100)": [
														
 
															+        "C3F8",
														
 
															+        "C4F10",
														
 
															+        "CF4",
														
 
															+        "C2F6",
														
 
															+        "C6F14",
														
 
															+        "C5F12",
														
 
															+        "cC4F8",
														
 
															+        "UnspMixOfPFCs (SARGWP100)",
														
 
															+    ],
														
 
															+    "PFCS (AR4GWP100)": [
														
 
															+        "C3F8",
														
 
															+        "C4F10",
														
 
															+        "CF4",
														
 
															+        "C2F6",
														
 
															+        "C6F14",
														
 
															+        "C5F12",
														
 
															+        "cC4F8",
														
 
															+        "UnspMixOfPFCs (AR4GWP100)",
														
 
															+    ],
														
 
															+    "PFCS (AR5GWP100)": [
														
 
															+        "C3F8",
														
 
															+        "C4F10",
														
 
															+        "CF4",
														
 
															+        "C2F6",
														
 
															+        "C6F14",
														
 
															+        "C5F12",
														
 
															+        "cC4F8",
														
 
															+        "UnspMixOfPFCs (AR5GWP100)",
														
 
															+    ],
														
 
															+    "PFCS (AR6GWP100)": [
														
 
															+        "C3F8",
														
 
															+        "C4F10",
														
 
															+        "CF4",
														
 
															+        "C2F6",
														
 
															+        "C6F14",
														
 
															+        "C5F12",
														
 
															+        "cC4F8",
														
 
															+        "UnspMixOfPFCs (AR6GWP100)",
														
 
															+    ],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": [
														
 
															+        "CO2",
														
 
															+        "CH4",
														
 
															+        "N2O",
														
 
															+        "SF6",
														
 
															+        "NF3",
														
 
															+        "HFCS (SARGWP100)",
														
 
															+        "PFCS (SARGWP100)",
														
 
															+    ],
														
 
															+    "KYOTOGHG (AR4GWP100)": [
														
 
															+        "CO2",
														
 
															+        "CH4",
														
 
															+        "N2O",
														
 
															+        "SF6",
														
 
															+        "NF3",
														
 
															+        "HFCS (AR4GWP100)",
														
 
															+        "PFCS (AR4GWP100)",
														
 
															+    ],
														
 
															+    "KYOTOGHG (AR5GWP100)": [
														
 
															+        "CO2",
														
 
															+        "CH4",
														
 
															+        "N2O",
														
 
															+        "SF6",
														
 
															+        "NF3",
														
 
															+        "HFCS (AR5GWP100)",
														
 
															+        "PFCS (AR5GWP100)",
														
 
															+    ],
														
 
															+    "KYOTOGHG (AR6GWP100)": [
														
 
															+        "CO2",
														
 
															+        "CH4",
														
 
															+        "N2O",
														
 
															+        "SF6",
														
 
															+        "NF3",
														
 
															+        "HFCS (AR6GWP100)",
														
 
															+        "PFCS (AR6GWP100)",
														
 
															+    ],
														
 
															 }
														
--- a/src/unfccc_ghg_data/helper/folder_mapping.py
+++ b/src/unfccc_ghg_data/helper/folder_mapping.py
@@ -1,4 +1,4 @@
 
															-""" create mapping of folder to countries
														
 
															+"""create mapping of folder to countries
														
 
															 this script takes a folder as input (from doit) and
														
 
															 runs creates the mapping of subfolders to country codes
														
@@ -13,16 +13,17 @@ if __name__ == "__main__":
 
															     # Find the right function and possible input and output files and
														
 
															     # read the data using datalad run.
														
 
															     parser = argparse.ArgumentParser()
														
 
															-    parser.add_argument('--folder', help='folder name, relative to '
														
 
															-                                         'repository root folder')
														
 
															+    parser.add_argument(
														
 
															+        "--folder", help="folder name, relative to " "repository root folder"
														
 
															+    )
														
 
															     args = parser.parse_args()
														
 
															     folder = args.folder
														
 
															-    if 'extracted_data' in folder:
														
 
															+    if "extracted_data" in folder:
														
 
															         extracted = True
														
 
															     else:
														
 
															         extracted = False
														
 
															     # print available submissions
														
 
															-    print("="*10 + f" Creating folder mapping for  {folder} " + "="*10)
														
 
															+    print("=" * 10 + f" Creating folder mapping for  {folder} " + "=" * 10)
														
 
															     create_folder_mapping(folder, extracted)
														
--- a/src/unfccc_ghg_data/helper/functions.py
+++ b/src/unfccc_ghg_data/helper/functions.py
@@ -1,4 +1,4 @@
 
															-""" common functions for unfccc_ghg_data
														
 
															+"""common functions for unfccc_ghg_data
														
 
															 Functions used by the different readers and downloaders in the unfccc_ghg_data package
														
 
															 """
														
@@ -74,8 +74,6 @@ def process_data_for_country(
 
															     xr.Dataset: processed dataset
														
 
															     """
														
 
															-
														
 
															-
														
 
															     # 0: gather information
														
 
															     countries = list(data_country.coords[data_country.attrs["area"]].values)
														
 
															     if len(countries) > 1:
														
@@ -956,9 +954,7 @@ def get_code_file(
 
															                     )
														
 
															                 else:
														
 
															                     if print_info:
														
 
															-                        print(
														
 
															-                            f"Found code file {file.relative_to(root_path)}"
														
 
															-                        )
														
 
															+                        print(f"Found code file {file.relative_to(root_path)}")
														
 
															                 code_file_path = file
														
 
															     if code_file_path is not None:
														
@@ -1011,8 +1007,10 @@ def fix_rows(
 
															         new_row = new_row.str.replace("- ", "-")
														
 
															         # replace spaces in numbers
														
 
															         pat = r"^(?P<first>[0-9\.,]*)\s(?P<last>[0-9\.,]*)$"
														
 
															+
														
 
															         def repl(m):
														
 
															             return f"{m.group('first')}{m.group('last')}"
														
 
															+
														
 
															         new_row = new_row.str.replace(pat, repl, regex=True)
														
 
															         data.loc[indices_to_merge[0]] = new_row
														
 
															         data = data.drop(indices_to_merge[1:])
														
--- a/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
@@ -27,13 +27,13 @@ from .util import NoCRFFilesError
 
															 ### reading functions
														
 
															 def convert_crf_table_to_pm2if(
														
 
															-        df_table: pd.DataFrame,
														
 
															-        submission_year: int,
														
 
															-        entity_mapping: Optional[dict[str,str]]=None,
														
 
															-        coords_defaults_input: Optional[dict[str,str]]=None,
														
 
															-        filter_remove_input: Optional[dict[str,dict[str,Union[str,list]]]]=None,
														
 
															-        filter_keep_input: Optional[dict[str,dict[str,Union[str,list]]]]=None,
														
 
															-        meta_data_input: Optional[dict[str,str]]=None,
														
 
															+    df_table: pd.DataFrame,
														
 
															+    submission_year: int,
														
 
															+    entity_mapping: Optional[dict[str, str]] = None,
														
 
															+    coords_defaults_input: Optional[dict[str, str]] = None,
														
 
															+    filter_remove_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
														
 
															+    filter_keep_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
														
 
															+    meta_data_input: Optional[dict[str, str]] = None,
														
 
															 ) -> pd.DataFrame:
														
 
															     """
														
 
															     Converts a given pandas long format crf table to PRIMAP2 interchange format
														
@@ -82,7 +82,7 @@ def convert_crf_table_to_pm2if(
 
															     }
														
 
															     add_coords_cols = {
														
 
															-    #    "orig_cat_name": ["orig_cat_name", "category"],
														
 
															+        #    "orig_cat_name": ["orig_cat_name", "category"],
														
 
															     }
														
 
															     coords_terminologies = {
														
@@ -108,8 +108,8 @@ def convert_crf_table_to_pm2if(
 
															     if entity_mapping is not None:
														
 
															         coords_value_mapping["entity"] = entity_mapping
														
 
															-    #coords_value_filling_template = {
														
 
															-    #}
														
 
															+    # coords_value_filling_template = {
														
 
															+    # }
														
 
															     filter_remove = {
														
 
															         "f1": {
														
@@ -120,13 +120,11 @@ def convert_crf_table_to_pm2if(
 
															         for key in filter_remove_input.keys():
														
 
															             filter_remove[key] = filter_remove_input[key]
														
 
															-    filter_keep = {
														
 
															-    }
														
 
															+    filter_keep = {}
														
 
															     if filter_keep_input is not None:
														
 
															         for key in filter_keep_input.keys():
														
 
															             filter_keep[key] = filter_keep_input[key]
														
 
															-
														
 
															     meta_data = {
														
 
															         "references": f"https://unfccc.int/ghg-inventories-annex-i-parties/{submission_year}",
														
 
															         "rights": "",
														
@@ -146,7 +144,7 @@ def convert_crf_table_to_pm2if(
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															         filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
@@ -156,13 +154,13 @@ def convert_crf_table_to_pm2if(
 
															 def read_crf_table(
														
 
															-        country_codes: Union[str, list[str]],
														
 
															-        table: str,
														
 
															-        submission_year: int,
														
 
															-        data_year: Optional[Union[int, list[int]]]=None,
														
 
															-        date: Optional[str]=None,
														
 
															-        folder: Optional[str]=None,
														
 
															-        debug: Optional[bool]=False,
														
 
															+    country_codes: Union[str, list[str]],
														
 
															+    table: str,
														
 
															+    submission_year: int,
														
 
															+    data_year: Optional[Union[int, list[int]]] = None,
														
 
															+    date: Optional[str] = None,
														
 
															+    folder: Optional[str] = None,
														
 
															+    debug: Optional[bool] = False,
														
 
															 ) -> tuple[pd.DataFrame, list[list], list[list]]:
														
 
															     """
														
 
															     Read CRF table for given submission year and country / or countries
														
@@ -216,13 +214,15 @@ def read_crf_table(
 
															         country_codes = [country_codes]
														
 
															     # get file names and locations
														
 
															-    input_files = get_crf_files(country_codes=country_codes,
														
 
															-                                submission_year=submission_year,
														
 
															-                                data_year=data_year,
														
 
															-                                date=date,
														
 
															-                                folder=folder)
														
 
															+    input_files = get_crf_files(
														
 
															+        country_codes=country_codes,
														
 
															+        submission_year=submission_year,
														
 
															+        data_year=data_year,
														
 
															+        date=date,
														
 
															+        folder=folder,
														
 
															+    )
														
 
															     # nasty fix for cases where exporting ran overnight and not all files have the same date
														
 
															-    if (date is not None) and (len(country_codes)==1):
														
 
															+    if (date is not None) and (len(country_codes) == 1):
														
 
															         if isinstance(data_year, list):
														
 
															             expected_files = len(data_year)
														
 
															         elif isinstance(data_year, int):
														
@@ -230,17 +230,23 @@ def read_crf_table(
 
															         else:
														
 
															             expected_files = submission_year - 1990 - 1
														
 
															         if len(input_files) < expected_files:
														
 
															-            print(f"Found only {len(input_files)} input files for {country_codes}. "
														
 
															-                  f"Expected {expected_files}.")
														
 
															-            print("Possibly exporting run overnight and some files have the previous day as date.")
														
 
															+            print(
														
 
															+                f"Found only {len(input_files)} input files for {country_codes}. "
														
 
															+                f"Expected {expected_files}."
														
 
															+            )
														
 
															+            print(
														
 
															+                "Possibly exporting run overnight and some files have the previous day as date."
														
 
															+            )
														
 
															             date_datetime = datetime.strptime(date, "%d%m%Y")
														
 
															             date_datetime = date_datetime - timedelta(days=1)
														
 
															             prv_date = date_datetime.strftime("%d%m%Y")
														
 
															-            more_input_files = get_crf_files(country_codes=country_codes,
														
 
															-                                             submission_year=submission_year,
														
 
															-                                             data_year=data_year,
														
 
															-                                             date=prv_date,
														
 
															-                                             folder=folder)
														
 
															+            more_input_files = get_crf_files(
														
 
															+                country_codes=country_codes,
														
 
															+                submission_year=submission_year,
														
 
															+                data_year=data_year,
														
 
															+                date=prv_date,
														
 
															+                folder=folder,
														
 
															+            )
														
 
															             if len(more_input_files) > 0:
														
 
															                 print(f"Found {len(more_input_files)} additional input files.")
														
 
															                 input_files = input_files + more_input_files
														
@@ -248,11 +254,13 @@ def read_crf_table(
 
															                 print("Found no additional input files")
														
 
															     if input_files == []:
														
 
															-        raise NoCRFFilesError(f"No files found for {country_codes}, "
														
 
															-                              f"submission_year={submission_year}, "
														
 
															-                              f"data_year={data_year}, "
														
 
															-                              f"date={date}, "
														
 
															-                              f"folder={folder}.")
														
 
															+        raise NoCRFFilesError(
														
 
															+            f"No files found for {country_codes}, "
														
 
															+            f"submission_year={submission_year}, "
														
 
															+            f"data_year={data_year}, "
														
 
															+            f"date={date}, "
														
 
															+            f"folder={folder}."
														
 
															+        )
														
 
															     # get specification
														
 
															     # if we only have a single country check if we might have a country specific
														
@@ -260,21 +268,25 @@ def read_crf_table(
 
															     if len(country_codes) == 1:
														
 
															         try:
														
 
															             crf_spec = getattr(crf, f"CRF{submission_year}_{country_codes[0]}")
														
 
															-            print(f"Using country specific specification: "
														
 
															-                  f"CRF{submission_year}_{country_codes[0]}")
														
 
															+            print(
														
 
															+                f"Using country specific specification: "
														
 
															+                f"CRF{submission_year}_{country_codes[0]}"
														
 
															+            )
														
 
															         except:
														
 
															             # no country specific specification, check for general specification
														
 
															             try:
														
 
															                 crf_spec = getattr(crf, f"CRF{submission_year}")
														
 
															             except:
														
 
															-                raise ValueError(f"No terminology exists for submission year "
														
 
															-                                 f"{submission_year}")
														
 
															+                raise ValueError(
														
 
															+                    f"No terminology exists for submission year " f"{submission_year}"
														
 
															+                )
														
 
															     else:
														
 
															         try:
														
 
															             crf_spec = getattr(crf, f"CRF{submission_year}")
														
 
															         except:
														
 
															-            raise ValueError(f"No terminology exists for submission year "
														
 
															-                             f"{submission_year}")
														
 
															+            raise ValueError(
														
 
															+                f"No terminology exists for submission year " f"{submission_year}"
														
 
															+            )
														
 
															     # now loop over files and read them
														
 
															     df_all = None
														
@@ -284,8 +296,11 @@ def read_crf_table(
 
															         file_info = get_info_from_crf_filename(file.name)
														
 
															         try:
														
 
															             int(file_info["data_year"])
														
 
															-            df_this_file, unknown_rows_this_file, last_row_info_this_file = \
														
 
															-                read_crf_table_from_file(file, table, crf_spec[table], debug=debug)
														
 
															+            (
														
 
															+                df_this_file,
														
 
															+                unknown_rows_this_file,
														
 
															+                last_row_info_this_file,
														
 
															+            ) = read_crf_table_from_file(file, table, crf_spec[table], debug=debug)
														
 
															             if df_all is None:
														
 
															                 df_all = df_this_file.copy(deep=True)
														
 
															                 unknown_rows = unknown_rows_this_file
														
@@ -301,10 +316,10 @@ def read_crf_table(
 
															 def read_crf_table_from_file(
														
 
															-        file: Path,
														
 
															-        table: str,
														
 
															-        table_spec: dict[str, dict],
														
 
															-        debug: Optional[bool]=False,
														
 
															+    file: Path,
														
 
															+    table: str,
														
 
															+    table_spec: dict[str, dict],
														
 
															+    debug: Optional[bool] = False,
														
 
															 ) -> tuple[pd.DataFrame, list[list], list[list]]:
														
 
															     """
														
 
															     Read a single CRF table from a given file. This is the core function of the CRF
														
@@ -344,7 +359,6 @@ def read_crf_table_from_file(
 
															             dlds = dl.api.Dataset(root_path)
														
 
															             dlds.get(file.relative_to(root_path))
														
 
															-
														
 
															     table_properties = table_spec["table"]
														
 
															     file_info = get_info_from_crf_filename(file.name)
														
@@ -353,16 +367,23 @@ def read_crf_table_from_file(
 
															     all_cats = [cat[0] for cat in all_cats_mapping]
														
 
															     unique_cats = [cat for (cat, count) in Counter(all_cats).items() if count == 1]
														
 
															-    unique_cat_tuples = [mapping for mapping in all_cats_mapping if mapping[0] in unique_cats]
														
 
															-    unique_mapping = dict(zip([tup[0] for tup in unique_cat_tuples],
														
 
															-                              [tup[1] for tup in unique_cat_tuples]))
														
 
															+    unique_cat_tuples = [
														
 
															+        mapping for mapping in all_cats_mapping if mapping[0] in unique_cats
														
 
															+    ]
														
 
															+    unique_mapping = dict(
														
 
															+        zip(
														
 
															+            [tup[0] for tup in unique_cat_tuples], [tup[1] for tup in unique_cat_tuples]
														
 
															+        )
														
 
															+    )
														
 
															     non_unique_cats = [cat for (cat, count) in Counter(all_cats).items() if count > 1]
														
 
															     # prepare the sector hierarchy
														
 
															     if non_unique_cats:
														
 
															         # if we have non-unique categories present we need the information on
														
 
															         # levels within the category hierarchy
														
 
															-        category_tree = create_category_tree(all_cats_mapping, table, file_info["party"])
														
 
															+        category_tree = create_category_tree(
														
 
															+            all_cats_mapping, table, file_info["party"]
														
 
															+        )
														
 
															     # prepare index colum information
														
 
															     cat_col = table_properties["col_for_categories"]
														
@@ -372,20 +393,37 @@ def read_crf_table_from_file(
 
															     # read the data
														
 
															     print(f"Reading table {table} for year {file_info['data_year']} from {file.name}.")
														
 
															     skiprows = table_properties["firstrow"] - 1
														
 
															-    nrows = table_properties["lastrow"] - skiprows + 1 # read one row more to check if we reached the end
														
 
															+    nrows = (
														
 
															+        table_properties["lastrow"] - skiprows + 1
														
 
															+    )  # read one row more to check if we reached the end
														
 
															     # we read with user specific NaN treatment as the NaN treatment is part of the conversion to
														
 
															     # PRIMAP2 format.
														
 
															-    df_raw = pd.read_excel(file, sheet_name=table, skiprows=skiprows , nrows=nrows, engine="openpyxl",
														
 
															-                               na_values=['-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN',
														
 
															-                                          'NULL', 'NaN', ''], keep_default_na=False)
														
 
															+    df_raw = pd.read_excel(
														
 
															+        file,
														
 
															+        sheet_name=table,
														
 
															+        skiprows=skiprows,
														
 
															+        nrows=nrows,
														
 
															+        engine="openpyxl",
														
 
															+        na_values=[
														
 
															+            "-1.#IND",
														
 
															+            "-1.#QNAN",
														
 
															+            "-NaN",
														
 
															+            "-nan",
														
 
															+            "1.#IND",
														
 
															+            "1.#QNAN",
														
 
															+            "NULL",
														
 
															+            "NaN",
														
 
															+            "",
														
 
															+        ],
														
 
															+        keep_default_na=False,
														
 
															+    )
														
 
															     if len(df_raw) < nrows:
														
 
															-        #print(f"read data truncated because of all-nan rows")
														
 
															+        # print(f"read data truncated because of all-nan rows")
														
 
															         last_row_nan = True
														
 
															     else:
														
 
															         last_row_nan = False
														
 
															-
														
 
															     cols_to_drop = []
														
 
															     # remove empty first column (for Australia tables start with an empty column)
														
 
															     # df_raw = df_raw.dropna(how="all", axis=1)
														
@@ -394,13 +432,14 @@ def read_crf_table_from_file(
 
															     # select only first table by cutting everything after a all-nan column (unless
														
 
															     # it's the first column)
														
 
															     if debug:
														
 
															-        print(f'Header before table end detection: {df_raw.columns.values}')
														
 
															+        print(f"Header before table end detection: {df_raw.columns.values}")
														
 
															     for colIdx in range(1, len(df_raw.columns.values)):
														
 
															-        if ((df_raw.iloc[:, colIdx].isna().all()) &
														
 
															-                (df_raw.columns[colIdx].startswith('Unnamed'))):
														
 
															+        if (df_raw.iloc[:, colIdx].isna().all()) & (
														
 
															+            df_raw.columns[colIdx].startswith("Unnamed")
														
 
															+        ):
														
 
															             cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx:])
														
 
															             if debug:
														
 
															-                print(f'cols_to_drop: {cols_to_drop}')
														
 
															+                print(f"cols_to_drop: {cols_to_drop}")
														
 
															             break
														
 
															     if cols_to_drop is not None:
														
@@ -414,7 +453,7 @@ def read_crf_table_from_file(
 
															     # the filling leads to long and a bit confusing headers, but as long
														
 
															     # as pandas can not fill values of merged cells in all individual cells
														
 
															     # we have to use some filling algorithm.
														
 
															-    df_header = df_raw.iloc[0:len(table_properties["header"])-1].copy(deep=True)
														
 
															+    df_header = df_raw.iloc[0 : len(table_properties["header"]) - 1].copy(deep=True)
														
 
															     df_header.loc[-1] = df_header.columns.values
														
 
															     df_header.index = df_header.index + 1
														
 
															     # replace "Unnamed: X" colum names by nan to fill from left in next step
														
@@ -447,15 +486,17 @@ def read_crf_table_from_file(
 
															                         entities[col] = f"{entities[col]} {value}"
														
 
															     if units is None:
														
 
															-        raise ValueError(f"Specification for table {table} does not contain unit information.")
														
 
															+        raise ValueError(
														
 
															+            f"Specification for table {table} does not contain unit information."
														
 
															+        )
														
 
															     # remove double spaces
														
 
															     entities = [entity.strip() for entity in entities]
														
 
															-    entities = [re.sub('\\s+', ' ', entity) for entity in entities]
														
 
															+    entities = [re.sub("\\s+", " ", entity) for entity in entities]
														
 
															     # replace the old header
														
 
															     if len(header) > 2:
														
 
															-        df_current = df_raw.drop(index=df_raw.iloc[0:len(header)-2].index)
														
 
															+        df_current = df_raw.drop(index=df_raw.iloc[0 : len(header) - 2].index)
														
 
															     else:
														
 
															         df_current = df_raw
														
@@ -469,11 +510,11 @@ def read_crf_table_from_file(
 
															     # remove double spaces
														
 
															     for col in cols_for_space_stripping:
														
 
															         df_current[col] = df_current[col].str.strip()
														
 
															-        df_current[col] = df_current[col].replace('\\s+', ' ', regex=True)
														
 
															+        df_current[col] = df_current[col].replace("\\s+", " ", regex=True)
														
 
															     # prepare for sector mapping by initializing result lists and
														
 
															     # variables
														
 
															-    new_cats = [[''] * len(table_properties["categories"])] * len(df_current)
														
 
															+    new_cats = [[""] * len(table_properties["categories"])] * len(df_current)
														
 
															     # copy the header rows which are not part of the index (unit)
														
 
															     new_cats[0] = [df_current.iloc[0][cat_col]] * len(table_properties["categories"])
														
@@ -485,7 +526,9 @@ def read_crf_table_from_file(
 
															     if non_unique_cats:
														
 
															         # need to initialize the tree parsing.
														
 
															         last_parent = category_tree.get_node("root")
														
 
															-        all_nodes = set([category_tree.get_node(node).tag for node in category_tree.nodes])
														
 
															+        all_nodes = set(
														
 
															+            [category_tree.get_node(node).tag for node in category_tree.nodes]
														
 
															+        )
														
 
															         for idx in range(1, len(df_current)):
														
 
															             current_cat = df_current.iloc[idx][cat_col]
														
@@ -497,8 +540,12 @@ def read_crf_table_from_file(
 
															                 break
														
 
															             # check if current category is a child of the last node
														
 
															-            children = dict([[child.tag, child.identifier]
														
 
															-                        for child in category_tree.children(last_parent.identifier)])
														
 
															+            children = dict(
														
 
															+                [
														
 
															+                    [child.tag, child.identifier]
														
 
															+                    for child in category_tree.children(last_parent.identifier)
														
 
															+                ]
														
 
															+            )
														
 
															             if current_cat in children.keys():
														
 
															                 # the current category is a child of the current parent
														
 
															                 # do the mapping
														
@@ -517,21 +564,39 @@ def read_crf_table_from_file(
 
															                 if current_cat in all_nodes:
														
 
															                     old_parent = last_parent
														
 
															-                    while (current_cat not in children.keys()) and \
														
 
															-                            (last_parent.identifier != "root"):
														
 
															+                    while (current_cat not in children.keys()) and (
														
 
															+                        last_parent.identifier != "root"
														
 
															+                    ):
														
 
															                         last_parent = category_tree.get_node(
														
 
															-                            last_parent.predecessor(category_tree.identifier))
														
 
															-                        children = dict([[child.tag, child.identifier]
														
 
															-                                    for child in category_tree.children(last_parent.identifier)])
														
 
															-
														
 
															-                    if (last_parent.identifier == "root") and \
														
 
															-                        (current_cat not in children.keys()):
														
 
															+                            last_parent.predecessor(category_tree.identifier)
														
 
															+                        )
														
 
															+                        children = dict(
														
 
															+                            [
														
 
															+                                [child.tag, child.identifier]
														
 
															+                                for child in category_tree.children(
														
 
															+                                    last_parent.identifier
														
 
															+                                )
														
 
															+                            ]
														
 
															+                        )
														
 
															+
														
 
															+                    if (last_parent.identifier == "root") and (
														
 
															+                        current_cat not in children.keys()
														
 
															+                    ):
														
 
															                         # we have not found the category as direct child of any of the
														
 
															                         # predecessors. Thus it is missing in the specification in
														
 
															                         # that place
														
 
															-                        print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, "
														
 
															-                              f"{file_info['data_year']} (last parent: {old_parent.tag}).")
														
 
															-                        unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
														
 
															+                        print(
														
 
															+                            f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, "
														
 
															+                            f"{file_info['data_year']} (last parent: {old_parent.tag})."
														
 
															+                        )
														
 
															+                        unknown_categories.append(
														
 
															+                            [
														
 
															+                                table,
														
 
															+                                file_info["party"],
														
 
															+                                current_cat,
														
 
															+                                file_info["data_year"],
														
 
															+                            ]
														
 
															+                        )
														
 
															                         # copy back the parent info to continue with next category
														
 
															                         last_parent = old_parent
														
 
															                     else:
														
@@ -543,8 +608,12 @@ def read_crf_table_from_file(
 
															                         if new_children:
														
 
															                             last_parent = node
														
 
															                 else:
														
 
															-                    print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}.")
														
 
															-                    unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
														
 
															+                    print(
														
 
															+                        f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
														
 
															+                    )
														
 
															+                    unknown_categories.append(
														
 
															+                        [table, file_info["party"], current_cat, file_info["data_year"]]
														
 
															+                    )
														
 
															     else:
														
 
															         for idx in range(1, len(df_current)):
														
 
															             current_cat = df_current.iloc[idx][cat_col]
														
@@ -557,30 +626,45 @@ def read_crf_table_from_file(
 
															             if current_cat in all_cats:
														
 
															                 new_cats[idx] = unique_mapping[current_cat]
														
 
															                 if (idx == len(df_current) - 1) and not last_row_nan:
														
 
															-                    print(f"found information in last row: category {current_cat}, row {idx}")
														
 
															-                    info_last_row.append([table, file_info["party"], current_cat, file_info['data_year']])
														
 
															+                    print(
														
 
															+                        f"found information in last row: category {current_cat}, row {idx}"
														
 
															+                    )
														
 
															+                    info_last_row.append(
														
 
															+                        [table, file_info["party"], current_cat, file_info["data_year"]]
														
 
															+                    )
														
 
															             else:
														
 
															-                print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}.")
														
 
															-                unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
														
 
															+                print(
														
 
															+                    f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
														
 
															+                )
														
 
															+                unknown_categories.append(
														
 
															+                    [table, file_info["party"], current_cat, file_info["data_year"]]
														
 
															+                )
														
 
															     for idx, col in enumerate(table_properties["categories"]):
														
 
															-        df_current.insert(loc=idx, column=col, value=
														
 
															-                          [cat[idx] for cat in new_cats])
														
 
															+        df_current.insert(loc=idx, column=col, value=[cat[idx] for cat in new_cats])
														
 
															     # set index
														
 
															     df_current = df_current.set_index(index_cols)
														
 
															     # process the unit information using the primap2 functions
														
 
															-    df_current = pm2.pm2io.nir_add_unit_information(df_current, **table_properties["unit_info"])
														
 
															+    df_current = pm2.pm2io.nir_add_unit_information(
														
 
															+        df_current, **table_properties["unit_info"]
														
 
															+    )
														
 
															     # convert to long format
														
 
															-    header_long = table_properties["categories"] + \
														
 
															-        ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															+    header_long = table_properties["categories"] + [
														
 
															+        "orig_cat_name",
														
 
															+        "entity",
														
 
															+        "unit",
														
 
															+        "time",
														
 
															+        "data",
														
 
															+    ]
														
 
															     df_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															-        df_current, file_info["data_year"], header_long=header_long)
														
 
															+        df_current, file_info["data_year"], header_long=header_long
														
 
															+    )
														
 
															     # add country information
														
 
															     df_long.insert(0, column="country", value=file_info["party"])
														
 
															-    #df_long.insert(1, column="submission", value=f"CRF{file_info['submission_year']}")
														
 
															+    # df_long.insert(1, column="submission", value=f"CRF{file_info['submission_year']}")
														
 
															     if "coords_defaults" in table_spec.keys():
														
 
															         for col in table_spec["coords_defaults"]:
														
 
															             df_long.insert(2, column=col, value=table_spec["coords_defaults"][col])
														
@@ -589,18 +673,17 @@ def read_crf_table_from_file(
 
															 def get_crf_files(
														
 
															-        country_codes: Union[str, list[str]],
														
 
															-        submission_year: int,
														
 
															-        data_year: Optional[Union[int, list[int]]] = None,
														
 
															-        date: Optional[str] = None,
														
 
															-        folder: Optional[str] = None,
														
 
															+    country_codes: Union[str, list[str]],
														
 
															+    submission_year: int,
														
 
															+    data_year: Optional[Union[int, list[int]]] = None,
														
 
															+    date: Optional[str] = None,
														
 
															+    folder: Optional[str] = None,
														
 
															 ) -> list[Path]:
														
 
															     """
														
 
															     Finds all files according to given parameters
														
 
															     Parameters
														
 
															     ----------
														
 
															-
														
 
															     country_codes: str or list[str]
														
 
															         ISO 3-letter country code or list of country codes
														
@@ -643,14 +726,20 @@ def get_crf_files(
 
															                 new_country_folders = folder_mapping[country_code]
														
 
															                 if isinstance(new_country_folders, str):
														
 
															                     # only one folder
														
 
															-                    country_folders = [*country_folders, data_folder / new_country_folders / submission_folder]
														
 
															+                    country_folders = [
														
 
															+                        *country_folders,
														
 
															+                        data_folder / new_country_folders / submission_folder,
														
 
															+                    ]
														
 
															                 else:
														
 
															-                    country_folders = country_folders + \
														
 
															-                                      [data_folder / folder / submission_folder
														
 
															-                                       for folder in new_country_folders]
														
 
															+                    country_folders = country_folders + [
														
 
															+                        data_folder / folder / submission_folder
														
 
															+                        for folder in new_country_folders
														
 
															+                    ]
														
 
															             else:
														
 
															-                raise ValueError(f"No data folder found for country {country_code}. "
														
 
															-                                 f"Check if folder mapping is up to date.")
														
 
															+                raise ValueError(
														
 
															+                    f"No data folder found for country {country_code}. "
														
 
															+                    f"Check if folder mapping is up to date."
														
 
															+                )
														
 
															     else:
														
 
															         country_folders = [folder]
														
@@ -671,17 +760,17 @@ def get_crf_files(
 
															                     file_filter["party"] = country
														
 
															                     dates = get_submission_dates(folder, file_filter)
														
 
															                     file_filter["date"] = find_latest_date(dates)
														
 
															-                    input_files = input_files + \
														
 
															-                                  filter_filenames(input_folder.glob("*.xlsx"),
														
 
															-                                                   **file_filter)
														
 
															+                    input_files = input_files + filter_filenames(
														
 
															+                        input_folder.glob("*.xlsx"), **file_filter
														
 
															+                    )
														
 
															             else:
														
 
															                 file_filter = file_filter_template.copy()
														
 
															                 if date is not None:
														
 
															                     file_filter["date"] = date
														
 
															-                input_files = input_files + \
														
 
															-                              filter_filenames(input_folder.glob("*.xlsx"),
														
 
															-                                               **file_filter)
														
 
															-        #else:
														
 
															+                input_files = input_files + filter_filenames(
														
 
															+                    input_folder.glob("*.xlsx"), **file_filter
														
 
															+                )
														
 
															+        # else:
														
 
															         #    raise ValueError(f"Folder {input_folder} does not exist")
														
 
															     if len(input_files) == 0:
														
 
															         raise ValueError(f"No input files found in {country_folders}")
														
@@ -699,7 +788,7 @@ def get_crf_files(
 
															 def get_info_from_crf_filename(
														
 
															-        filename: str,
														
 
															+    filename: str,
														
 
															 ) -> dict[str, Union[int, str]]:
														
 
															     """
														
 
															     Parse given file name and return a dict with information
														
@@ -707,7 +796,6 @@ def get_info_from_crf_filename(
 
															     Parameters
														
 
															     ----------
														
 
															-
														
 
															     filename: str
														
 
															         The file to analyze (without path)
														
@@ -729,8 +817,7 @@ def get_info_from_crf_filename(
 
															     try:
														
 
															         file_info["data_year"] = int(name_parts[2])
														
 
															     except:
														
 
															-        print(f"Data year string {name_parts[2]} "
														
 
															-              "could not be converted to int.")
														
 
															+        print(f"Data year string {name_parts[2]} " "could not be converted to int.")
														
 
															         file_info["data_year"] = name_parts[2]
														
 
															     file_info["date"] = name_parts[3]
														
 
															     # the last part (time code) is missing for Australia since 2023
														
@@ -742,11 +829,11 @@ def get_info_from_crf_filename(
 
															 def filter_filenames(
														
 
															-        files_to_filter: list[Path],
														
 
															-        party: Optional[Union[str, list[str]]] = None,
														
 
															-        data_year: Optional[Union[int, list[int]]] = None,
														
 
															-        submission_year: Optional[str] = None,
														
 
															-        date: Optional[str] = None,
														
 
															+    files_to_filter: list[Path],
														
 
															+    party: Optional[Union[str, list[str]]] = None,
														
 
															+    data_year: Optional[Union[int, list[int]]] = None,
														
 
															+    submission_year: Optional[str] = None,
														
 
															+    date: Optional[str] = None,
														
 
															 ) -> list[Path]:
														
 
															     """Filter a list of filenames of CRF files
														
@@ -792,8 +879,8 @@ def filter_filenames(
 
															 def check_crf_file_info(
														
 
															-        file_info: dict,
														
 
															-        file_filter: dict,
														
 
															+    file_info: dict,
														
 
															+    file_filter: dict,
														
 
															 ) -> bool:
														
 
															     """
														
 
															     Check if a CRF file has given properties
														
@@ -837,9 +924,9 @@ def check_crf_file_info(
 
															 def create_category_tree(
														
 
															-        specification: list[list],
														
 
															-        table: str,
														
 
															-        country: Optional[str] = None,
														
 
															+    specification: list[list],
														
 
															+    table: str,
														
 
															+    country: Optional[str] = None,
														
 
															 ) -> Tree:
														
 
															     """
														
 
															     Create a treelib Tree for the categorical hierarchy from a CRF
														
@@ -850,7 +937,6 @@ def create_category_tree(
 
															     Parameters
														
 
															     ----------
														
 
															-
														
 
															     specification: List[List]
														
 
															         The `sector_mapping` dict of a table specification
														
@@ -866,8 +952,10 @@ def create_category_tree(
 
															     """
														
 
															     # small sanity check on the specification
														
 
															     if len(specification[0]) < 3:
														
 
															-        raise ValueError(f"Error: Specification for Table {table} has non-unique "
														
 
															-                         "categories and need level specifications")
														
 
															+        raise ValueError(
														
 
															+            f"Error: Specification for Table {table} has non-unique "
														
 
															+            "categories and need level specifications"
														
 
															+        )
														
 
															     # initialize variables for tree building
														
 
															     parent_info = [
														
@@ -888,11 +976,11 @@ def create_category_tree(
 
															     if country is not None:
														
 
															         # remove country tags from categories and mark categories
														
 
															         # for other countries for removal
														
 
															-        specification = [filter_category(mapping, country)
														
 
															-                         for mapping in specification]
														
 
															+        specification = [filter_category(mapping, country) for mapping in specification]
														
 
															         # remove the categories for other countries
														
 
															-        specification = [mapping for mapping in specification
														
 
															-                         if mapping[0] != "\\REMOVE"]
														
 
															+        specification = [
														
 
															+            mapping for mapping in specification if mapping[0] != "\\REMOVE"
														
 
															+        ]
														
 
															     # build a tree from specification
														
 
															     # when looping over the categories present in the table
														
@@ -903,7 +991,9 @@ def create_category_tree(
 
															         if current_cat_level == last_cat_info["level"]:
														
 
															             # cat has the same level as preceeding on, so no change to
														
 
															             # parent node
														
 
															-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
														
 
															+            category_tree.create_node(
														
 
															+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
														
 
															+            )
														
 
															         elif current_cat_level == last_cat_info["level"] + 1:
														
 
															             # the current category is one level further away from
														
 
															             # the trunk of the tree. This means that
														
@@ -913,23 +1003,29 @@ def create_category_tree(
 
															                 {
														
 
															                     "id": last_cat_info["id"],
														
 
															                     "tag": last_cat_info["category"],
														
 
															-                    "level": last_cat_info["level"]
														
 
															+                    "level": last_cat_info["level"],
														
 
															                 }
														
 
															             )
														
 
															             # add the category as new node
														
 
															-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
														
 
															+            category_tree.create_node(
														
 
															+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
														
 
															+            )
														
 
															         elif current_cat_level < last_cat_info["level"]:
														
 
															             # the new level is smaller (closer to the trunk)
														
 
															             # than the last one. Thus we remove all parents
														
 
															             # from this level on
														
 
															-            parent_info = parent_info[0: current_cat_level + 1]
														
 
															-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
														
 
															+            parent_info = parent_info[0 : current_cat_level + 1]
														
 
															+            category_tree.create_node(
														
 
															+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
														
 
															+            )
														
 
															         else:
														
 
															             # increase in levels of more than one is not allowed
														
 
															-            raise ValueError(f"Error in sector hierarchy for table {table}, category {current_cat}: "
														
 
															-                             f"Category level is {current_cat_level} and parent level is "
														
 
															-                             f"{parent_info[-1]['level']}")
														
 
															+            raise ValueError(
														
 
															+                f"Error in sector hierarchy for table {table}, category {current_cat}: "
														
 
															+                f"Category level is {current_cat_level} and parent level is "
														
 
															+                f"{parent_info[-1]['level']}"
														
 
															+            )
														
 
															         # set last_cat_info
														
 
															         last_cat_info["category"] = current_cat
														
@@ -940,8 +1036,8 @@ def create_category_tree(
 
															 def filter_category(
														
 
															-        mapping: list,
														
 
															-        country: str,
														
 
															+    mapping: list,
														
 
															+    country: str,
														
 
															 ) -> list[str]:
														
 
															     """
														
 
															     This function checks if a category mapping is suitable for the given country.
														
@@ -975,9 +1071,9 @@ def filter_category(
 
															             new_mapping[0] = "\\REMOVE"
														
 
															         else:
														
 
															             re_result = re.search(regex_exclude_full, mapping[0])
														
 
															-            new_mapping[0] = mapping[0][len(re_result.group(1)) + 1:]
														
 
															+            new_mapping[0] = mapping[0][len(re_result.group(1)) + 1 :]
														
 
															     elif mapping[0].startswith(string_country):
														
 
															-        new_mapping[0] = mapping[0][len(string_country) + 1:]
														
 
															+        new_mapping[0] = mapping[0][len(string_country) + 1 :]
														
 
															     elif re.match(regex_countries, mapping[0]):
														
 
															         new_mapping[0] = "\\REMOVE"
														
@@ -985,9 +1081,9 @@ def filter_category(
 
															 def get_latest_date_for_country(
														
 
															-        country_code: str,
														
 
															-        submission_year: int,
														
 
															-)->str:
														
 
															+    country_code: str,
														
 
															+    submission_year: int,
														
 
															+) -> str:
														
 
															     """
														
 
															     Find the latest submission date for a country
														
@@ -1013,26 +1109,36 @@ def get_latest_date_for_country(
 
															         country_folders = folder_mapping[country_code]
														
 
															         if isinstance(country_folders, str):
														
 
															             # only one folder
														
 
															-            submission_date = find_latest_date(get_submission_dates(
														
 
															-                downloaded_data_path_UNFCCC / country_folders / f"CRF{submission_year}", file_filter))
														
 
															+            submission_date = find_latest_date(
														
 
															+                get_submission_dates(
														
 
															+                    downloaded_data_path_UNFCCC
														
 
															+                    / country_folders
														
 
															+                    / f"CRF{submission_year}",
														
 
															+                    file_filter,
														
 
															+                )
														
 
															+            )
														
 
															         else:
														
 
															             dates = []
														
 
															             for folder in country_folders:
														
 
															-                folder_submission = downloaded_data_path_UNFCCC / folder / f"CRF{submission_year}"
														
 
															+                folder_submission = (
														
 
															+                    downloaded_data_path_UNFCCC / folder / f"CRF{submission_year}"
														
 
															+                )
														
 
															                 if folder_submission.exists():
														
 
															                     dates = dates + get_submission_dates(folder_submission, file_filter)
														
 
															             submission_date = find_latest_date(dates)
														
 
															     else:
														
 
															-        raise ValueError(f"No data folder found for country {country_code}. "
														
 
															-                         f"Check if folder mapping is up to date.")
														
 
															+        raise ValueError(
														
 
															+            f"No data folder found for country {country_code}. "
														
 
															+            f"Check if folder mapping is up to date."
														
 
															+        )
														
 
															     return submission_date
														
 
															 def get_submission_dates(
														
 
															-        folder: Path,
														
 
															-        file_filter: dict[str, Union[str, int, list]],
														
 
															-)->list[str]:
														
 
															+    folder: Path,
														
 
															+    file_filter: dict[str, Union[str, int, list]],
														
 
															+) -> list[str]:
														
 
															     """
														
 
															     Returns all submission dates available in a folder
														
@@ -1050,8 +1156,10 @@ def get_submission_dates(
 
															             List of dates as str
														
 
															     """
														
 
															     if "date" in file_filter:
														
 
															-        raise ValueError("'date' present in 'file_filter'. This makes no sense as "
														
 
															-                         "the function's purpose is to return available dates.")
														
 
															+        raise ValueError(
														
 
															+            "'date' present in 'file_filter'. This makes no sense as "
														
 
															+            "the function's purpose is to return available dates."
														
 
															+        )
														
 
															     if folder.exists():
														
 
															         files = filter_filenames(folder.glob("*.xlsx"), **file_filter)
														
@@ -1065,9 +1173,9 @@ def get_submission_dates(
 
															 def get_submission_parties(
														
 
															-        folder: Path,
														
 
															-        file_filter: dict[str, Union[str, int, list]],
														
 
															-)->list[str]:
														
 
															+    folder: Path,
														
 
															+    file_filter: dict[str, Union[str, int, list]],
														
 
															+) -> list[str]:
														
 
															     """
														
 
															     Returns all submission dates available in a folder
														
@@ -1085,8 +1193,10 @@ def get_submission_parties(
 
															             List of parties as str
														
 
															     """
														
 
															     if "party" in file_filter:
														
 
															-        raise ValueError("'party' present in 'file_filter'. This makes no sense as "
														
 
															-                         "the function's purpose is to return available parties.")
														
 
															+        raise ValueError(
														
 
															+            "'party' present in 'file_filter'. This makes no sense as "
														
 
															+            "the function's purpose is to return available parties."
														
 
															+        )
														
 
															     if folder.exists():
														
 
															         files = filter_filenames(list(folder.glob("*.xlsx")), **file_filter)
														
@@ -1100,9 +1210,9 @@ def get_submission_parties(
 
															 def find_latest_date(
														
 
															-        dates: list[str],
														
 
															-        date_format: str='%d%m%Y',
														
 
															-)-> str:
														
 
															+    dates: list[str],
														
 
															+    date_format: str = "%d%m%Y",
														
 
															+) -> str:
														
 
															     """
														
 
															     Returns the latest date in a list of dates as str in the format
														
 
															     ddmmyyyy
														
@@ -1117,11 +1227,11 @@ def find_latest_date(
 
															         str: latest date
														
 
															     """
														
 
															     if len(dates) > 0:
														
 
															-        dates_datetime = [[date, datetime.strptime(date, date_format)] for date in
														
 
															-                          dates]
														
 
															+        dates_datetime = [
														
 
															+            [date, datetime.strptime(date, date_format)] for date in dates
														
 
															+        ]
														
 
															         dates_datetime = sorted(dates_datetime, key=itemgetter(1))
														
 
															     else:
														
 
															         raise ValueError("Passed list of dates is empty")
														
 
															     return dates_datetime[-1][0]
														
 
															-
														
--- a/src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country.py
+++ b/src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country.py
@@ -6,12 +6,13 @@ function such that it can be called from datalad
 
															 import argparse
														
 
															 from unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_core import (
														
 
															-    read_UNFCCC_DI_for_country)
														
 
															+    read_UNFCCC_DI_for_country,
														
 
															+)
														
 
															 if __name__ == "__main__":
														
 
															     parser = argparse.ArgumentParser()
														
 
															-    parser.add_argument('--country', help='Country code')
														
 
															-    parser.add_argument('--date', help='String with current date')
														
 
															+    parser.add_argument("--country", help="Country code")
														
 
															+    parser.add_argument("--date", help="String with current date")
														
 
															     args = parser.parse_args()
														
 
															     country_code = args.country
														
@@ -19,10 +20,10 @@ if __name__ == "__main__":
 
															     read_UNFCCC_DI_for_country(
														
 
															         country_code=country_code,
														
 
															-        category_groups=None, # read all categories
														
 
															-        read_subsectors=False, # not applicable as we read all categories
														
 
															+        category_groups=None,  # read all categories
														
 
															+        read_subsectors=False,  # not applicable as we read all categories
														
 
															         date_str=date_str,
														
 
															-        pm2if_specifications=None, # automatically use the right specs for AI and NAI
														
 
															-        default_gwp=None, # automatically uses right default GWP for AI and NAI
														
 
															+        pm2if_specifications=None,  # automatically use the right specs for AI and NAI
														
 
															+        default_gwp=None,  # automatically uses right default GWP for AI and NAI
														
 
															         debug=False,
														
 
															     )
														
--- a/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py
@@ -5,7 +5,7 @@ Based on `process_bur` from national-inventory-submissions
 
															 (https://github.com/openclimatedata/national-inventory-submisions)
														
 
															 """
														
 
															-#import requests
														
 
															+# import requests
														
 
															 import re
														
 
															 import time
														
 
															 from pathlib import Path
														
@@ -15,25 +15,24 @@ import pandas as pd
 
															 from bs4 import BeautifulSoup
														
 
															 from selenium.webdriver import Firefox
														
 
															 from selenium.webdriver.firefox.options import Options
														
 
															-from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC
														
 
															-
														
 
															+from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
														
 
															 if __name__ == "__main__":
														
 
															     print("Fetching BUR submissions ...")
														
 
															     url = "https://unfccc.int/BURs"
														
 
															-    #print(url)
														
 
															+    # print(url)
														
 
															     # set options for headless mode
														
 
															     profile_path = ".firefox"
														
 
															     options = Options()
														
 
															-    options.add_argument('-headless')
														
 
															+    options.add_argument("-headless")
														
 
															     # create profile for headless mode and automatic downloading
														
 
															-    options.set_preference('profile', profile_path)
														
 
															+    options.set_preference("profile", profile_path)
														
 
															     # set up selenium driver
														
 
															     driver = Firefox(options=options)
														
@@ -64,7 +63,6 @@ if __name__ == "__main__":
 
															             if str(Path(href).parent).endswith("documents"):
														
 
															                 targets.append({"title": title, "url": href})
														
 
															-
														
 
															     pattern = re.compile(r"BUR ?\d")
														
 
															     # Go through sub-pages.
														
@@ -79,7 +77,6 @@ if __name__ == "__main__":
 
															         else:
														
 
															             no_downloads.append({target["title"], url})
														
 
															-
														
 
															     if len(no_downloads) > 0:
														
 
															         print("No downloads for ", no_downloads)
														
--- a/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_nc.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_nc.py
@@ -19,21 +19,20 @@ from selenium.webdriver.firefox.options import Options
 
															 from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC
														
 
															 from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
														
 
															-
														
 
															 if __name__ == "__main__":
														
 
															     print("Fetching NC submissions ...")
														
 
															     url = "https://unfccc.int/non-annex-I-NCs"
														
 
															-    #print(url)
														
 
															+    # print(url)
														
 
															     # set options for headless mode
														
 
															     profile_path = ".firefox"
														
 
															     options = Options()
														
 
															-    options.add_argument('-headless')
														
 
															+    options.add_argument("-headless")
														
 
															     # create profile for headless mode and automatic downloading
														
 
															-    options.set_preference('profile', profile_path)
														
 
															+    options.set_preference("profile", profile_path)
														
 
															     # set up selenium driver
														
 
															     driver = Firefox(options=options)
														
@@ -64,10 +63,8 @@ if __name__ == "__main__":
 
															             if str(Path(href).parent).endswith("documents"):
														
 
															                 targets.append({"title": title, "url": href})
														
 
															-
														
 
															     pattern = re.compile(r"NC ?\d")
														
 
															-
														
 
															     # Go through sub-pages.
														
 
															     for target in targets:
														
 
															         time.sleep(randrange(5, 15))
														
@@ -80,7 +77,6 @@ if __name__ == "__main__":
 
															         else:
														
 
															             no_downloads.append({target["title"], url})
														
 
															-
														
 
															     if len(no_downloads) > 0:
														
 
															         print("No downloads for ", no_downloads)
														
--- a/src/unfccc_ghg_data/unfccc_reader/Argentina/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Argentina/__init__.py
@@ -1,7 +1,30 @@
 
															-"""Argentina (BUR4)
														
 
															+"""Read Argentina's BURs, NIRs, NCs
														
 
															 Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															-Currently code for the following submissions is available:
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															-* BUR4 (from pdf)
														
 
															-"""
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'ARG'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=ARG
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR4_from_pdf.py
@@ -2,16 +2,22 @@
 
															 Read Argentina's BUR4 from pdf
														
 
															 This script reads data from Argentina's fourth Binnial Update Report (BUR4).
														
 
															- Data is read from the pdf file using camelot"""
														
 
															+Data is read from the pdf file using camelot
														
 
															+"""
														
 
															+import os
														
 
															 import sys
														
 
															 import camelot
														
 
															 import primap2 as pm2
														
 
															 from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
														
 
															-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
 
															-from unfccc_ghg_data.helper import gas_baskets, process_data_for_country
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    gas_baskets,
														
 
															+    process_data_for_country,
														
 
															+)
														
 
															 # ###
														
 
															 # configuration
														
@@ -21,53 +27,49 @@ from unfccc_ghg_data.helper import gas_baskets, process_data_for_country
 
															 #  PRIMAP2 version
														
 
															 if __name__ == "__main__":
														
 
															     # folders and files
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Argentina' / \
														
 
															-                   'BUR4'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Argentina'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Argentina" / "BUR4"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Argentina"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'ARG_BUR4_2022_'
														
 
															+    output_filename = "ARG_BUR4_2022_"
														
 
															+
														
 
															+    pdf_file = "4to_Informe_Bienal_de_la_Rep%C3%BAblica_Argentina.pdf"
														
 
															-    pdf_file = '4to_Informe_Bienal_de_la_Rep%C3%BAblica_Argentina.pdf'
														
 
															+    # definitions part 1: reading data from pdf and preprocessing for conversion to
														
 
															+    # PRIMAP2 format
														
 
															-    # definitions part 1: reading data from pdf and preprocessing for conversion to PRIMAP2 format
														
 
															     # part 1.1 KyotoGHG, CO2, CH4, N2O tables
														
 
															     #
														
 
															     pages_to_read = range(232, 244)
														
 
															     data_start_keyword = "Id#"
														
 
															     data_end_keyword = "Fuente: Elaboración propia"
														
 
															-    index_cols = ['Id#', 'Nombre']
														
 
															-    col_rename = {
														
 
															-        index_cols[0]: "category",
														
 
															-        index_cols[1]: "orig_cat_name"
														
 
															-    }
														
 
															-    metadata = {
														
 
															-        "entity": [0, 1],
														
 
															-        "unit": [0, 2]
														
 
															-    }
														
 
															+    index_cols = ["Id#", "Nombre"]
														
 
															+    col_rename = {index_cols[0]: "category", index_cols[1]: "orig_cat_name"}
														
 
															+    metadata = {"entity": [0, 1], "unit": [0, 2]}
														
 
															     rows_to_drop = [0]
														
 
															     metadata_mapping = {
														
 
															-        'unit': {
														
 
															-            '(GgCO2e)': 'GgCO2e',
														
 
															-            '(GgCO2)': 'Gg',
														
 
															-            '(GgN2O)': 'Gg',
														
 
															-            '(GgCH4)': 'Gg',
														
 
															-            '(GgGas)': 'Gg',
														
 
															+        "unit": {
														
 
															+            "(GgCO2e)": "GgCO2e",
														
 
															+            "(GgCO2)": "Gg",
														
 
															+            "(GgN2O)": "Gg",
														
 
															+            "(GgCH4)": "Gg",
														
 
															+            "(GgGas)": "Gg",
														
 
															         }
														
 
															     }
														
 
															     # part 1.2: fgases table
														
 
															-    # the f-gases table is in wide format with no sectoral resolution and gases as row header
														
 
															+    # the f-gases table is in wide format with no sectoral resolution and gases as row
														
 
															+    # header
														
 
															     pages_to_read_fgases = range(244, 247)
														
 
															     data_start_keyword_fgases = "Gas"
														
 
															-    index_cols_fgases = ['Gas']
														
 
															+    index_cols_fgases = ["Gas"]
														
 
															     cols_to_drop_fgases = ["Nombre"]
														
 
															     metadata_fgases = {
														
 
															         "unit": [0, 2],
														
 
															-        "category": '2',
														
 
															+        "category": "2",
														
 
															         "orig_cat_name": "PROCESOS INDUSTRIALES Y USO DE PRODUCTOS",
														
 
															     }
														
 
															     col_rename_fgases = {
														
@@ -79,14 +81,14 @@ if __name__ == "__main__":
 
															     cats_remove = ["Information Items", "Memo Items (3)"]
														
 
															     # manual category codes
														
 
															     cat_codes_manual = {  # conversion to PRIMAP1 format
														
 
															-        '1A6': 'MBIO',
														
 
															-        '1A3di': 'MBKM',
														
 
															-        '1A3ai': 'MBKA',
														
 
															-        '1A3di Navegación marítima y fluvial internacional': 'MBKM',
														
 
															-        'S/N': 'MMULTIOP',
														
 
															+        "1A6": "MBIO",
														
 
															+        "1A3di": "MBKM",
														
 
															+        "1A3ai": "MBKA",
														
 
															+        "1A3di Navegación marítima y fluvial internacional": "MBKM",
														
 
															+        "S/N": "MMULTIOP",
														
 
															     }
														
 
															-    cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
														
 
															+    cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,8}).*"
														
 
															     time_format = "%Y"
														
@@ -116,32 +118,32 @@ if __name__ == "__main__":
 
															     coords_value_mapping = {
														
 
															         #    "category": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'HFC-23': 'HFC23',
														
 
															-            'HFC-32': 'HFC32',
														
 
															-            'HFC-41': 'HFC41',
														
 
															-            'HFC-43-10mee': 'HFC4310mee',
														
 
															-            'HFC-125': 'HFC125',
														
 
															-            'HFC-134': 'HFC134',
														
 
															-            'HFC-134a': 'HFC134a',
														
 
															-            'HFC-152a': 'HFC152a',
														
 
															-            'HFC-143': 'HFC143',
														
 
															-            'HFC-143a': 'HFC143a',
														
 
															-            'HFC-227ea': 'HFC227ea',
														
 
															-            'HFC-236fa': 'HFC236fa',
														
 
															-            'HFC-245ca': 'HFC245ca',
														
 
															-            'HFC-365mfc': 'HFC365mfc',
														
 
															-            'HFC-245fa': 'HFC245fa',
														
 
															-            'PFC-143 (CF4)': 'CF4',
														
 
															-            'PFC-116 (C2F6)': 'C2F6',
														
 
															-            'PFC-218 (C3F8)': 'C3F8',
														
 
															-            'PFC-31-10 (C4F10)': 'C4F10',
														
 
															-            'c-C4F8': 'cC4F8',
														
 
															-            'PFC-51-144 (C6F14)': 'C6F14',
														
 
															+            "HFC-23": "HFC23",
														
 
															+            "HFC-32": "HFC32",
														
 
															+            "HFC-41": "HFC41",
														
 
															+            "HFC-43-10mee": "HFC4310mee",
														
 
															+            "HFC-125": "HFC125",
														
 
															+            "HFC-134": "HFC134",
														
 
															+            "HFC-134a": "HFC134a",
														
 
															+            "HFC-152a": "HFC152a",
														
 
															+            "HFC-143": "HFC143",
														
 
															+            "HFC-143a": "HFC143a",
														
 
															+            "HFC-227ea": "HFC227ea",
														
 
															+            "HFC-236fa": "HFC236fa",
														
 
															+            "HFC-245ca": "HFC245ca",
														
 
															+            "HFC-365mfc": "HFC365mfc",
														
 
															+            "HFC-245fa": "HFC245fa",
														
 
															+            "PFC-143 (CF4)": "CF4",
														
 
															+            "PFC-116 (C2F6)": "C2F6",
														
 
															+            "PFC-218 (C3F8)": "C3F8",
														
 
															+            "PFC-31-10 (C4F10)": "C4F10",
														
 
															+            "c-C4F8": "cC4F8",
														
 
															+            "PFC-51-144 (C6F14)": "C6F14",
														
 
															         },
														
 
															         "unit": "PRIMAP1",
														
 
															         "orig_cat_name": {
														
 
															             "1A3di Navegación marítima y fluvial internacional": "Navegación marítima y fluvial internacional",
														
 
															-        }
														
 
															+        },
														
 
															     }
														
 
															     coords_value_filling = {
														
@@ -172,7 +174,8 @@ if __name__ == "__main__":
 
															         "references": "https://unfccc.int/documents/419772",
														
 
															         "rights": "XXXX",
														
 
															         "contact": "mail@johannes-guetschow.de",
														
 
															-        "title": "Cuarto Informe Bienal de Actualización de la República Argentina a la Convención Marco delas Naciones Unidas Sobre el Cambio Climático",
														
 
															+        "title": "Cuarto Informe Bienal de Actualización de la República Argentina a "
														
 
															+        "la Convención Marco delas Naciones Unidas Sobre el Cambio Climático",
														
 
															         "comment": "Read fom pdf file by Johannes Gütschow",
														
 
															         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
														
 
															     }
														
@@ -192,8 +195,9 @@ if __name__ == "__main__":
 
															     data_all = None
														
 
															     for page in pages_to_read:
														
 
															         # read current page
														
 
															-        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
														
 
															-                                  flavor='stream')
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / pdf_file), pages=str(page), flavor="stream"
														
 
															+        )
														
 
															         df_current = tables[0].df
														
 
															         rows_to_drop = []
														
 
															         for index, data in df_current.iterrows():
														
@@ -212,16 +216,18 @@ if __name__ == "__main__":
 
															         df_current = df_current.drop(rows_to_drop)
														
 
															         idx_header = df_current.index[df_current[0] == index_cols[0]].tolist()
														
 
															         df_current = df_current.rename(
														
 
															-            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
														
 
															+            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1
														
 
															+        )
														
 
															         df_current = df_current.drop(idx_header)
														
 
															         # for sheet "Aggregate GHGs" fill entity cell
														
 
															         if page in range(232, 235):
														
 
															             df_current.iloc[
														
 
															-                metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
														
 
															+                metadata["entity"][0], metadata["entity"][1]
														
 
															+            ] = "KYOTOGHG (SARGWP100)"
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set index. necessary for the stack operation in the conversion to long format
														
 
															         # df_current = df_current.set_index(index_cols)
														
 
															         # add columns
														
@@ -242,21 +248,27 @@ if __name__ == "__main__":
 
															         df_current = df_current.drop(df_current.index[0])
														
 
															         # fix number format
														
 
															-        df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
														
 
															-        df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
														
 
															-                                      axis=1)
														
 
															+        df_current = df_current.apply(
														
 
															+            lambda x: x.str.replace(".", "", regex=False), axis=1
														
 
															+        )
														
 
															+        df_current = df_current.apply(
														
 
															+            lambda x: x.str.replace(",", ".", regex=False), axis=1
														
 
															+        )
														
 
															-        df_current.rename(columns=col_rename, inplace=True)
														
 
															+        df_current = df_current.rename(columns=col_rename)
														
 
															         # reindex
														
 
															-        df_current.reset_index(inplace=True, drop=True)
														
 
															+        df_current = df_current.reset_index(drop=True)
														
 
															         df_current["category"] = df_current["category"].replace(cat_codes_manual)
														
 
															+
														
 
															         # then the regex replacements
														
 
															-        def repl(m):
														
 
															-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															-        df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
														
 
															-                                                                    regex=True)
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															+        df_current["category"] = df_current["category"].str.replace(
														
 
															+            cat_code_regexp, repl, regex=True
														
 
															+        )
														
 
															         df_current = df_current.reset_index(drop=True)
														
@@ -274,7 +286,7 @@ if __name__ == "__main__":
 
															             coords_value_filling=coords_value_filling,
														
 
															             filter_remove=filter_remove,
														
 
															             filter_keep=filter_keep,
														
 
															-            meta_data=meta_data
														
 
															+            meta_data=meta_data,
														
 
															         )
														
 
															         # convert to PRIMAP2 native format
														
@@ -289,8 +301,9 @@ if __name__ == "__main__":
 
															     # read fgases
														
 
															     for page in pages_to_read_fgases:
														
 
															         # read current page
														
 
															-        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
														
 
															-                                  flavor='stream')
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / pdf_file), pages=str(page), flavor="stream"
														
 
															+        )
														
 
															         df_current = tables[0].df
														
 
															         rows_to_drop = []
														
 
															         for index, data in df_current.iterrows():
														
@@ -309,11 +322,12 @@ if __name__ == "__main__":
 
															         df_current = df_current.drop(rows_to_drop)
														
 
															         idx_header = df_current.index[df_current[0] == index_cols_fgases[0]].tolist()
														
 
															         df_current = df_current.rename(
														
 
															-            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
														
 
															+            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1
														
 
															+        )
														
 
															         df_current = df_current.drop(idx_header)
														
 
															         # drop all rows where the index cols (category code
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols_fgases)
														
 
															         # set index. necessary for the stack operation in the conversion to long format
														
 
															         # df_current = df_current.set_index(index_cols)
														
 
															         # add columns
														
@@ -324,7 +338,8 @@ if __name__ == "__main__":
 
															                 value = metadata_fgases[col]
														
 
															             else:
														
 
															                 value = df_current.iloc[
														
 
															-                    metadata_fgases[col][0], metadata_fgases[col][1] + inserted]
														
 
															+                    metadata_fgases[col][0], metadata_fgases[col][1] + inserted
														
 
															+                ]
														
 
															                 if col in metadata_mapping.keys():
														
 
															                     if value in metadata_mapping[col].keys():
														
 
															                         value = metadata_mapping[col][value]
														
@@ -339,21 +354,27 @@ if __name__ == "__main__":
 
															         df_current = df_current.drop(df_current.index[0])
														
 
															         # fix number format
														
 
															-        df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
														
 
															-        df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
														
 
															-                                      axis=1)
														
 
															+        df_current = df_current.apply(
														
 
															+            lambda x: x.str.replace(".", "", regex=False), axis=1
														
 
															+        )
														
 
															+        df_current = df_current.apply(
														
 
															+            lambda x: x.str.replace(",", ".", regex=False), axis=1
														
 
															+        )
														
 
															-        df_current.rename(columns=col_rename_fgases, inplace=True)
														
 
															+        df_current = df_current.rename(columns=col_rename_fgases)
														
 
															         # reindex
														
 
															-        df_current.reset_index(inplace=True, drop=True)
														
 
															+        df_current = df_current.reset_index(drop=True)
														
 
															         df_current["category"] = df_current["category"].replace(cat_codes_manual)
														
 
															-        # then the regex repalcements
														
 
															-        def repl(m):
														
 
															-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															-        df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
														
 
															-                                                                    regex=True)
														
 
															+
														
 
															+        # then the regex replacements
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															+        df_current["category"] = df_current["category"].str.replace(
														
 
															+            cat_code_regexp, repl, regex=True
														
 
															+        )
														
 
															         df_current = df_current.reset_index(drop=True)
														
@@ -371,7 +392,7 @@ if __name__ == "__main__":
 
															             coords_value_filling=coords_value_filling,
														
 
															             filter_remove=filter_remove,
														
 
															             filter_keep=filter_keep,
														
 
															-            meta_data=meta_data
														
 
															+            meta_data=meta_data,
														
 
															         )
														
 
															         # convert to PRIMAP2 native format
														
@@ -390,19 +411,17 @@ if __name__ == "__main__":
 
															         processing_info_country=None,
														
 
															     )
														
 
															-
														
 
															     # ###
														
 
															     # save data to IF and native format
														
 
															     # ###
														
 
															     encoding = {var: compression for var in data_all.data_vars}
														
 
															-    data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies[
														
 
															-        "category"] + ".nc"), encoding=encoding)
														
 
															+    data_all.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     data_if = data_all.pr.to_interchange_format()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
@@ -1,7 +1,30 @@
 
															-"""Chile (BUR4, BUR5)
														
 
															+"""Read Chile's BURs, NIRs, NCs
														
 
															-Scripts and configurations to read Chile's is available:
														
 
															- * BUR4 (from xlsx)
														
 
															- * BUR5 (from xlsx)
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															-"""
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'CHL'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=CHL
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py
@@ -1,166 +1,295 @@
 
															+"""Config for Chile BUR4,5
														
 
															+
														
 
															+General configuration for reading the inventory files underlying Chile's BURs 4 and 5.
														
 
															+PRIMAP2 data for mat specific configuration is BUR specific and not contained here
														
 
															+but in the reading scripts.
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 ## parameters for conversion to IPCC2006 categories
														
 
															 filter_remove_IPCC2006 = {
														
 
															-    "filter_cats": { # filter cats that have no 1:1 match for IPCC2006 or are additional subsectors
														
 
															+    "filter_cats": {  # filter cats that have no 1:1 match for IPCC2006 or are additional subsectors
														
 
															         "category (IPCC2006_PRIMAP)": [
														
 
															             # refrigeration and air conditioning subsectors don't match IPCC2006
														
 
															-            '2.F.1.a', '2.F.1.b', '2.F.1.c', '2.F.1.d', '2.F.1.e', '2.F.1.f',
														
 
															+            "2.F.1.a",
														
 
															+            "2.F.1.b",
														
 
															+            "2.F.1.c",
														
 
															+            "2.F.1.d",
														
 
															+            "2.F.1.e",
														
 
															+            "2.F.1.f",
														
 
															             # additional subsectors for other cattle in enteric fermentation
														
 
															-            '3.A.1.b.i', '3.A.1.b.ii', '3.A.1.b.iii', '3.A.1.b.iv', '3.A.1.b.v',
														
 
															+            "3.A.1.b.i",
														
 
															+            "3.A.1.b.ii",
														
 
															+            "3.A.1.b.iii",
														
 
															+            "3.A.1.b.iv",
														
 
															+            "3.A.1.b.v",
														
 
															             # additional subcategories for swine in enteric fermentation
														
 
															-            '3.A.3.a', '3.A.3.b', '3.A.3.c',
														
 
															+            "3.A.3.a",
														
 
															+            "3.A.3.b",
														
 
															+            "3.A.3.c",
														
 
															             # other animals in enteric fermentation not fitting the IPCC2006 other animals
														
 
															-            '3.A.4',
														
 
															+            "3.A.4",
														
 
															             # need to be summed to '3.A.4.j'
														
 
															-            '3.A.4.f', '3.A.4.g', '3.A.4.g.i', '3.A.4.g.ii',
														
 
															+            "3.A.4.f",
														
 
															+            "3.A.4.g",
														
 
															+            "3.A.4.g.i",
														
 
															+            "3.A.4.g.ii",
														
 
															             # additional subsectors for other cattle in enteric fermentation
														
 
															-            '3.B.1.b.i', '3.B.1.b.ii', '3.B.1.b.iii', '3.B.1.b.iv', '3.B.1.b.v',
														
 
															+            "3.B.1.b.i",
														
 
															+            "3.B.1.b.ii",
														
 
															+            "3.B.1.b.iii",
														
 
															+            "3.B.1.b.iv",
														
 
															+            "3.B.1.b.v",
														
 
															             # additional subcategories for swine in enteric fermentation
														
 
															-            '3.B.3.a', '3.B.3.b', '3.B.3.c',
														
 
															+            "3.B.3.a",
														
 
															+            "3.B.3.b",
														
 
															+            "3.B.3.c",
														
 
															             # other animals in enteric fermentation not fitting the IPCC2006 other animals
														
 
															-            '3.B.4',
														
 
															+            "3.B.4",
														
 
															             # need to be summed to '3.A.4.j'
														
 
															-            '3.B.4.f', '3.B.4.g', '3.B.4.g.i', '3.B.4.g.ii',
														
 
															+            "3.B.4.f",
														
 
															+            "3.B.4.g",
														
 
															+            "3.B.4.g.i",
														
 
															+            "3.B.4.g.ii",
														
 
															             # subsectors of indirect N2O from manure management
														
 
															-            '3.B.5.a', '3.B.5.b', '3.B.5.c', '3.B.5.d', '3.B.5.d.i', '3.B.5.d.ii',
														
 
															-            '3.B.5.d.iii', '3.B.5.d.iv', '3.B.5.d.v', '3.B.5.d.vi', '3.B.5.d.vii',
														
 
															+            "3.B.5.a",
														
 
															+            "3.B.5.b",
														
 
															+            "3.B.5.c",
														
 
															+            "3.B.5.d",
														
 
															+            "3.B.5.d.i",
														
 
															+            "3.B.5.d.ii",
														
 
															+            "3.B.5.d.iii",
														
 
															+            "3.B.5.d.iv",
														
 
															+            "3.B.5.d.v",
														
 
															+            "3.B.5.d.vi",
														
 
															+            "3.B.5.d.vii",
														
 
															             # subsectors of rice cultivation
														
 
															-            '3.C.1', '3.C.2', '3.C.3', '3.C.4',
														
 
															+            "3.C.1",
														
 
															+            "3.C.2",
														
 
															+            "3.C.3",
														
 
															+            "3.C.4",
														
 
															             # no direct represenation of "agricultural soils" in IPCC 2006
														
 
															-            '3.D',
														
 
															+            "3.D",
														
 
															             # subsectors of 3.D.1. not matching subsectors of 3.C.4 (direct emissions from managed soils)
														
 
															             # '3.D.1.a.': '3.C.1.a', '3.D.1.b.': '3.C.1.b', '3.D.1.c.': '3.A.4.c', '3.D.1.d.': '3.C.4.d',
														
 
															-            '3.D.1.a', '3.D.1.b', '3.D.1.b.i', '3.D.1.b.ii', '3.D.1.b.iii', '3.D.1.c',
														
 
															-            '3.D.1.d', '3.D.1.e', '3.D.1.f', '3.D.1.g',
														
 
															+            "3.D.1.a",
														
 
															+            "3.D.1.b",
														
 
															+            "3.D.1.b.i",
														
 
															+            "3.D.1.b.ii",
														
 
															+            "3.D.1.b.iii",
														
 
															+            "3.D.1.c",
														
 
															+            "3.D.1.d",
														
 
															+            "3.D.1.e",
														
 
															+            "3.D.1.f",
														
 
															+            "3.D.1.g",
														
 
															             # additional subsector level of 3.D.2.a (3.C.5.a Atmospheric deposition)
														
 
															-            '3.D.2.a.i', '3.D.2.a.ii', '3.D.2.a.ii.1', '3.D.2.a.ii.2', '3.D.2.a.ii.3', '3.D.2.a.iii',
														
 
															+            "3.D.2.a.i",
														
 
															+            "3.D.2.a.ii",
														
 
															+            "3.D.2.a.ii.1",
														
 
															+            "3.D.2.a.ii.2",
														
 
															+            "3.D.2.a.ii.3",
														
 
															+            "3.D.2.a.iii",
														
 
															             # additional subsector level of 3.D.2.b (3.C.5.b Nitrongen leaching and runoff)
														
 
															-            '3.D.2.b.i', '3.D.2.b.ii', '3.D.2.b.ii.1', '3.D.2.b.ii.2', '3.D.2.b.ii.3', '3.D.2.b.iii',
														
 
															-            '3.D.2.b.iv', '3.D.2.b.v',
														
 
															+            "3.D.2.b.i",
														
 
															+            "3.D.2.b.ii",
														
 
															+            "3.D.2.b.ii.1",
														
 
															+            "3.D.2.b.ii.2",
														
 
															+            "3.D.2.b.ii.3",
														
 
															+            "3.D.2.b.iii",
														
 
															+            "3.D.2.b.iv",
														
 
															+            "3.D.2.b.v",
														
 
															             # additional subsector level of 3.F (3.C.1.b Biomass burning in cropland)
														
 
															-            '3.F.1', '3.F.2', '3.F.3',
														
 
															+            "3.F.1",
														
 
															+            "3.F.2",
														
 
															+            "3.F.3",
														
 
															             # additional subsector level of 3.G (3.C.2 Liming)
														
 
															-            '3.G.1', '3.G.2',
														
 
															+            "3.G.1",
														
 
															+            "3.G.2",
														
 
															             # additional subsector levels of 4.A.1 (3.A.1.a Forest land remaining forest land)
														
 
															-            '4.A.1.a', '4.A.1.a.i', '4.A.1.a.i.1', '4.A.1.a.i.1.a', '4.A.1.a.i.1.b', '4.A.1.a.i.1.c',
														
 
															-            '4.A.1.a.i.1.d', '4.A.1.a.i.1.e', '4.A.1.a.i.1.f', '4.A.1.a.i.1.g', '4.A.1.a.i.1.h',
														
 
															-            '4.A.1.a.i.1.i', '4.A.1.a.i.1.j', '4.A.1.a.i.1.k', '4.A.1.a.i.1.l', '4.A.1.a.i.2',
														
 
															-            '4.A.1.a.i.2.a', '4.A.1.a.i.2.b', '4.A.1.a.i.2.c', '4.A.1.a.i.2.d', '4.A.1.a.i.2.e',
														
 
															-            '4.A.1.a.i.2.f', '4.A.1.a.i.2.g', '4.A.1.a.i.2.h', '4.A.1.a.i.2.i', '4.A.1.a.i.2.j',
														
 
															-            '4.A.1.a.i.2.k', '4.A.1.a.i.2.l', '4.A.1.a.i.3', '4.A.1.a.i.3.a', '4.A.1.a.i.3.b',
														
 
															-            '4.A.1.a.i.3.c', '4.A.1.a.i.3.d', '4.A.1.a.i.3.e', '4.A.1.a.i.3.f', '4.A.1.a.i.3.g',
														
 
															-            '4.A.1.a.i.3.h', '4.A.1.a.i.3.i', '4.A.1.a.i.3.j', '4.A.1.a.i.3.k', '4.A.1.a.i.3.l',
														
 
															-            '4.A.1.a.ii', '4.A.1.a.ii.1', '4.A.1.a.ii.2', '4.A.1.a.ii.3', '4.A.1.a.ii.4',
														
 
															-            '4.A.1.a.ii.5', '4.A.1.a.ii.6', '4.A.1.a.ii.7', '4.A.1.b', '4.A.1.b.i', '4.A.1.b.i.1',
														
 
															-            '4.A.1.b.i.2', '4.A.1.b.i.3', '4.A.1.b.i.4', '4.A.1.b.ii', '4.A.1.b.ii.1', '4.A.1.b.ii.2',
														
 
															-            '4.A.1.b.iii', '4.A.1.b.iii.1', '4.A.1.b.iii.1.a', '4.A.1.b.iii.1.b', '4.A.1.b.iii.2',
														
 
															-            '4.A.1.b.iv', '4.A.1.c', '4.A.1.c.i', '4.A.1.c.ii',
														
 
															+            "4.A.1.a",
														
 
															+            "4.A.1.a.i",
														
 
															+            "4.A.1.a.i.1",
														
 
															+            "4.A.1.a.i.1.a",
														
 
															+            "4.A.1.a.i.1.b",
														
 
															+            "4.A.1.a.i.1.c",
														
 
															+            "4.A.1.a.i.1.d",
														
 
															+            "4.A.1.a.i.1.e",
														
 
															+            "4.A.1.a.i.1.f",
														
 
															+            "4.A.1.a.i.1.g",
														
 
															+            "4.A.1.a.i.1.h",
														
 
															+            "4.A.1.a.i.1.i",
														
 
															+            "4.A.1.a.i.1.j",
														
 
															+            "4.A.1.a.i.1.k",
														
 
															+            "4.A.1.a.i.1.l",
														
 
															+            "4.A.1.a.i.2",
														
 
															+            "4.A.1.a.i.2.a",
														
 
															+            "4.A.1.a.i.2.b",
														
 
															+            "4.A.1.a.i.2.c",
														
 
															+            "4.A.1.a.i.2.d",
														
 
															+            "4.A.1.a.i.2.e",
														
 
															+            "4.A.1.a.i.2.f",
														
 
															+            "4.A.1.a.i.2.g",
														
 
															+            "4.A.1.a.i.2.h",
														
 
															+            "4.A.1.a.i.2.i",
														
 
															+            "4.A.1.a.i.2.j",
														
 
															+            "4.A.1.a.i.2.k",
														
 
															+            "4.A.1.a.i.2.l",
														
 
															+            "4.A.1.a.i.3",
														
 
															+            "4.A.1.a.i.3.a",
														
 
															+            "4.A.1.a.i.3.b",
														
 
															+            "4.A.1.a.i.3.c",
														
 
															+            "4.A.1.a.i.3.d",
														
 
															+            "4.A.1.a.i.3.e",
														
 
															+            "4.A.1.a.i.3.f",
														
 
															+            "4.A.1.a.i.3.g",
														
 
															+            "4.A.1.a.i.3.h",
														
 
															+            "4.A.1.a.i.3.i",
														
 
															+            "4.A.1.a.i.3.j",
														
 
															+            "4.A.1.a.i.3.k",
														
 
															+            "4.A.1.a.i.3.l",
														
 
															+            "4.A.1.a.ii",
														
 
															+            "4.A.1.a.ii.1",
														
 
															+            "4.A.1.a.ii.2",
														
 
															+            "4.A.1.a.ii.3",
														
 
															+            "4.A.1.a.ii.4",
														
 
															+            "4.A.1.a.ii.5",
														
 
															+            "4.A.1.a.ii.6",
														
 
															+            "4.A.1.a.ii.7",
														
 
															+            "4.A.1.b",
														
 
															+            "4.A.1.b.i",
														
 
															+            "4.A.1.b.i.1",
														
 
															+            "4.A.1.b.i.2",
														
 
															+            "4.A.1.b.i.3",
														
 
															+            "4.A.1.b.i.4",
														
 
															+            "4.A.1.b.ii",
														
 
															+            "4.A.1.b.ii.1",
														
 
															+            "4.A.1.b.ii.2",
														
 
															+            "4.A.1.b.iii",
														
 
															+            "4.A.1.b.iii.1",
														
 
															+            "4.A.1.b.iii.1.a",
														
 
															+            "4.A.1.b.iii.1.b",
														
 
															+            "4.A.1.b.iii.2",
														
 
															+            "4.A.1.b.iv",
														
 
															+            "4.A.1.c",
														
 
															+            "4.A.1.c.i",
														
 
															+            "4.A.1.c.ii",
														
 
															             # additional subsector level in land converted to forest land
														
 
															-            '4.A.2.a.i', '4.A.2.a.ii', '4.A.2.b.i', '4.A.2.b.ii', '4.A.2.c.i', '4.A.2.c.ii',
														
 
															-            '4.A.2.d.i', '4.A.2.d.ii', '4.A.2.e.i', '4.A.2.e.ii',
														
 
															+            "4.A.2.a.i",
														
 
															+            "4.A.2.a.ii",
														
 
															+            "4.A.2.b.i",
														
 
															+            "4.A.2.b.ii",
														
 
															+            "4.A.2.c.i",
														
 
															+            "4.A.2.c.ii",
														
 
															+            "4.A.2.d.i",
														
 
															+            "4.A.2.d.ii",
														
 
															+            "4.A.2.e.i",
														
 
															+            "4.A.2.e.ii",
														
 
															             # subsectors of solid waste disposal might not match
														
 
															-            '5.A.1', '5.A.2', '5.A.3',
														
 
															+            "5.A.1",
														
 
															+            "5.A.2",
														
 
															+            "5.A.3",
														
 
															         ],
														
 
															     },
														
 
															 }
														
 
															-cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
														
 
															-    '3': 'M.AG',
														
 
															-    '3.A': '3.A.1',
														
 
															-    '3.A.1': '3.A.1.a',
														
 
															-    '3.A.1.a': '3.A.1.a.i',
														
 
															-    '3.A.1.b': '3.A.1.a.ii',
														
 
															-    '3.A.2': '3.A.1.c',
														
 
															-    '3.A.3': '3.A.1.h',
														
 
															-    '3.A.4.a': '3.A.1.b',
														
 
															-    '3.A.4.b': '3.A.1.d',
														
 
															-    '3.A.4.c': '3.A.1.f',
														
 
															-    '3.A.4.d': '3.A.1.g',
														
 
															-    '3.A.4.e': '3.A.1.i',
														
 
															-    '3.B': '3.A.2',
														
 
															-    '3.B.1': '3.A.2.a',
														
 
															-    '3.B.1.a': '3.A.2.a.i',
														
 
															-    '3.B.1.b': '3.A.2.a.ii',
														
 
															-    '3.B.2': '3.A.2.c',
														
 
															-    '3.B.3': '3.A.2.h',
														
 
															-    '3.B.4.a': '3.A.2.b',
														
 
															-    '3.B.4.b': '3.A.2.d',
														
 
															-    '3.B.4.c': '3.A.2.f',
														
 
															-    '3.B.4.d': '3.A.2.g',
														
 
															-    '3.B.4.e': '3.A.2.i',
														
 
															-    '3.B.5': '3.C.6',
														
 
															-    '3.C': '3.C.7',
														
 
															-    '3.D.1': '3.C.4',
														
 
															-    '3.D.2': '3.C.5',
														
 
															-    '3.D.2.a': '3.C.5.a', # not in climate_categories
														
 
															-    '3.D.2.b': '3.C.5.b', # not in climate_categories
														
 
															-    '3.E': '3.C.1.c',
														
 
															-    '3.F': '3.C.1.b',
														
 
															-    '3.G': '3.C.2',
														
 
															-    '3.H': '3.C.3',
														
 
															-    '3.I': '3.C.8.a', # merge this with cat below
														
 
															-    '3.J': '3.C.8.b', # merge with cat above
														
 
															-    '4': 'M.LULUCF',
														
 
															-    '4.A': '3.B.1',
														
 
															-    '4.A.1': '3.B.1.a',
														
 
															-    '4.A.2': '3.B.1.b',
														
 
															-    '4.A.2.a': '3.B.1.b.i',
														
 
															-    '4.A.2.b': '3.B.1.b.ii',
														
 
															-    '4.A.2.c': '3.B.1.b.iii',
														
 
															-    '4.A.2.d': '3.B.1.b.iv',
														
 
															-    '4.A.2.e': '3.B.1.b.v',
														
 
															-    '4.B': '3.B.2',
														
 
															-    '4.B.1': '3.B.2.a',
														
 
															-    '4.B.2': '3.B.2.b',
														
 
															-    '4.B.2.a': '3.B.2.b.i',
														
 
															-    '4.B.2.b': '3.B.2.b.ii',
														
 
															-    '4.B.2.c': '3.B.2.b.iii',
														
 
															-    '4.B.2.d': '3.B.2.b.iv',
														
 
															-    '4.B.2.e': '3.B.2.b.v',
														
 
															-    '4.C': '3.B.3',
														
 
															-    '4.C.1': '3.B.3.a',
														
 
															-    '4.C.2': '3.B.3.b',
														
 
															-    '4.C.2.a': '3.B.3.b.i',
														
 
															-    '4.C.2.b': '3.B.3.b.ii',
														
 
															-    '4.C.2.c': '3.B.3.b.iii',
														
 
															-    '4.C.2.d': '3.B.3.b.iv',
														
 
															-    '4.C.2.e': '3.B.3.b.v',
														
 
															-    '4.D': '3.B.4',
														
 
															-    '4.D.1': '3.B.4.a',
														
 
															-    '4.D.2': '3.B.4.b',
														
 
															-    '4.D.2.a': '3.B.4.b.i',
														
 
															-    '4.D.2.b': '3.B.4.b.ii',
														
 
															-    '4.D.2.c': '3.B.4.b.iii',
														
 
															-    '4.D.2.d': '3.B.4.b.iv',
														
 
															-    '4.D.2.e': '3.B.4.b.v',
														
 
															-    '4.E': '3.B.5',
														
 
															-    '4.E.1': '3.B.5.a',
														
 
															-    '4.E.2': '3.B.5.b',
														
 
															-    '4.E.2.a': '3.B.5.b.i',
														
 
															-    '4.E.2.b': '3.B.5.b.ii',
														
 
															-    '4.E.2.c': '3.B.5.b.iii',
														
 
															-    '4.E.2.d': '3.B.5.b.iv',
														
 
															-    '4.E.2.e': '3.B.5.b.v',
														
 
															-    '4.F': '3.B.6',
														
 
															-    '4.F.1': '3.B.6.a',
														
 
															-    '4.F.2': '3.B.6.b',
														
 
															-    '4.F.2.a': '3.B.6.b.i',
														
 
															-    '4.F.2.b': '3.B.6.b.ii',
														
 
															-    '4.F.2.c': '3.B.6.b.iii',
														
 
															-    '4.F.2.d': '3.B.6.b.iv',
														
 
															-    '4.F.2.e': '3.B.6.b.v',
														
 
															-    '4.G': '3.D.1',
														
 
															-    '4.H': '3.D.2',
														
 
															-    '5': '4',
														
 
															-    '5.A': '4.A',
														
 
															-    '5.B': '4.B',
														
 
															-    '5.C': '4.C',
														
 
															-    '5.C.1': '4.C.1',
														
 
															-    '5.C.2': '4.C.2',
														
 
															-    '5.D': '4.D',
														
 
															-    '5.D.1': '4.D.1',
														
 
															-    '5.D.2': '4.D.2',
														
 
															-    '5.E': '4.E',
														
 
															+cat_mapping = {  # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
														
 
															+    "3": "M.AG",
														
 
															+    "3.A": "3.A.1",
														
 
															+    "3.A.1": "3.A.1.a",
														
 
															+    "3.A.1.a": "3.A.1.a.i",
														
 
															+    "3.A.1.b": "3.A.1.a.ii",
														
 
															+    "3.A.2": "3.A.1.c",
														
 
															+    "3.A.3": "3.A.1.h",
														
 
															+    "3.A.4.a": "3.A.1.b",
														
 
															+    "3.A.4.b": "3.A.1.d",
														
 
															+    "3.A.4.c": "3.A.1.f",
														
 
															+    "3.A.4.d": "3.A.1.g",
														
 
															+    "3.A.4.e": "3.A.1.i",
														
 
															+    "3.B": "3.A.2",
														
 
															+    "3.B.1": "3.A.2.a",
														
 
															+    "3.B.1.a": "3.A.2.a.i",
														
 
															+    "3.B.1.b": "3.A.2.a.ii",
														
 
															+    "3.B.2": "3.A.2.c",
														
 
															+    "3.B.3": "3.A.2.h",
														
 
															+    "3.B.4.a": "3.A.2.b",
														
 
															+    "3.B.4.b": "3.A.2.d",
														
 
															+    "3.B.4.c": "3.A.2.f",
														
 
															+    "3.B.4.d": "3.A.2.g",
														
 
															+    "3.B.4.e": "3.A.2.i",
														
 
															+    "3.B.5": "3.C.6",
														
 
															+    "3.C": "3.C.7",
														
 
															+    "3.D.1": "3.C.4",
														
 
															+    "3.D.2": "3.C.5",
														
 
															+    "3.D.2.a": "3.C.5.a",  # not in climate_categories
														
 
															+    "3.D.2.b": "3.C.5.b",  # not in climate_categories
														
 
															+    "3.E": "3.C.1.c",
														
 
															+    "3.F": "3.C.1.b",
														
 
															+    "3.G": "3.C.2",
														
 
															+    "3.H": "3.C.3",
														
 
															+    "3.I": "3.C.8.a",  # merge this with cat below
														
 
															+    "3.J": "3.C.8.b",  # merge with cat above
														
 
															+    "4": "M.LULUCF",
														
 
															+    "4.A": "3.B.1",
														
 
															+    "4.A.1": "3.B.1.a",
														
 
															+    "4.A.2": "3.B.1.b",
														
 
															+    "4.A.2.a": "3.B.1.b.i",
														
 
															+    "4.A.2.b": "3.B.1.b.ii",
														
 
															+    "4.A.2.c": "3.B.1.b.iii",
														
 
															+    "4.A.2.d": "3.B.1.b.iv",
														
 
															+    "4.A.2.e": "3.B.1.b.v",
														
 
															+    "4.B": "3.B.2",
														
 
															+    "4.B.1": "3.B.2.a",
														
 
															+    "4.B.2": "3.B.2.b",
														
 
															+    "4.B.2.a": "3.B.2.b.i",
														
 
															+    "4.B.2.b": "3.B.2.b.ii",
														
 
															+    "4.B.2.c": "3.B.2.b.iii",
														
 
															+    "4.B.2.d": "3.B.2.b.iv",
														
 
															+    "4.B.2.e": "3.B.2.b.v",
														
 
															+    "4.C": "3.B.3",
														
 
															+    "4.C.1": "3.B.3.a",
														
 
															+    "4.C.2": "3.B.3.b",
														
 
															+    "4.C.2.a": "3.B.3.b.i",
														
 
															+    "4.C.2.b": "3.B.3.b.ii",
														
 
															+    "4.C.2.c": "3.B.3.b.iii",
														
 
															+    "4.C.2.d": "3.B.3.b.iv",
														
 
															+    "4.C.2.e": "3.B.3.b.v",
														
 
															+    "4.D": "3.B.4",
														
 
															+    "4.D.1": "3.B.4.a",
														
 
															+    "4.D.2": "3.B.4.b",
														
 
															+    "4.D.2.a": "3.B.4.b.i",
														
 
															+    "4.D.2.b": "3.B.4.b.ii",
														
 
															+    "4.D.2.c": "3.B.4.b.iii",
														
 
															+    "4.D.2.d": "3.B.4.b.iv",
														
 
															+    "4.D.2.e": "3.B.4.b.v",
														
 
															+    "4.E": "3.B.5",
														
 
															+    "4.E.1": "3.B.5.a",
														
 
															+    "4.E.2": "3.B.5.b",
														
 
															+    "4.E.2.a": "3.B.5.b.i",
														
 
															+    "4.E.2.b": "3.B.5.b.ii",
														
 
															+    "4.E.2.c": "3.B.5.b.iii",
														
 
															+    "4.E.2.d": "3.B.5.b.iv",
														
 
															+    "4.E.2.e": "3.B.5.b.v",
														
 
															+    "4.F": "3.B.6",
														
 
															+    "4.F.1": "3.B.6.a",
														
 
															+    "4.F.2": "3.B.6.b",
														
 
															+    "4.F.2.a": "3.B.6.b.i",
														
 
															+    "4.F.2.b": "3.B.6.b.ii",
														
 
															+    "4.F.2.c": "3.B.6.b.iii",
														
 
															+    "4.F.2.d": "3.B.6.b.iv",
														
 
															+    "4.F.2.e": "3.B.6.b.v",
														
 
															+    "4.G": "3.D.1",
														
 
															+    "4.H": "3.D.2",
														
 
															+    "5": "4",
														
 
															+    "5.A": "4.A",
														
 
															+    "5.B": "4.B",
														
 
															+    "5.C": "4.C",
														
 
															+    "5.C.1": "4.C.1",
														
 
															+    "5.C.2": "4.C.2",
														
 
															+    "5.D": "4.D",
														
 
															+    "5.D.1": "4.D.1",
														
 
															+    "5.D.2": "4.D.2",
														
 
															+    "5.E": "4.E",
														
 
															 }
														
 
															 # comments
														
@@ -176,11 +305,29 @@ cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in
 
															 # '3.A.4.g.ii.',
														
 
															 aggregate_cats = {
														
 
															-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
														
 
															-    '3.C.1': {'sources': ['3.C.1.b','3.C.1.c'], 'name': 'Emissions from Biomass Burning'},
														
 
															-    '3.C.8': {'sources': ['3.C.8.a', '3.C.8.b'], 'name': 'Other'},
														
 
															-    '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7', '3.C.8'], 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-    '3.D': {'sources': ['3.D.1', '3.D.2'], 'name': 'Other'},
														
 
															-    '3': {'sources': ['3.A', '3.B', '3.C', '3.D'], 'name': 'AFOLU'},
														
 
															+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+    "3.B": {
														
 
															+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
														
 
															+        "name": "Land",
														
 
															+    },
														
 
															+    "3.C.1": {
														
 
															+        "sources": ["3.C.1.b", "3.C.1.c"],
														
 
															+        "name": "Emissions from Biomass Burning",
														
 
															+    },
														
 
															+    "3.C.8": {"sources": ["3.C.8.a", "3.C.8.b"], "name": "Other"},
														
 
															+    "3.C": {
														
 
															+        "sources": [
														
 
															+            "3.C.1",
														
 
															+            "3.C.2",
														
 
															+            "3.C.3",
														
 
															+            "3.C.4",
														
 
															+            "3.C.5",
														
 
															+            "3.C.6",
														
 
															+            "3.C.7",
														
 
															+            "3.C.8",
														
 
															+        ],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+    },
														
 
															+    "3.D": {"sources": ["3.D.1", "3.D.2"], "name": "Other"},
														
 
															+    "3": {"sources": ["3.A", "3.B", "3.C", "3.D"], "name": "AFOLU"},
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
@@ -2,14 +2,15 @@
 
															 Read Chile's 2020 inventory from xlsx
														
 
															 This script reads data from Chile's 2020 national inventory which is underlying BUR4.
														
 
															- Data is read from the xlsx file"""
														
 
															+Data is read from the xlsx file
														
 
															+"""
														
 
															 import os
														
 
															 import sys
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
														
 
															+from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
														
 
															 from primap2.pm2io._data_reading import filter_data, matches_time_format
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -20,42 +21,49 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # folders and files
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Chile' / 'BUR4'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Chile'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Chile" / "BUR4"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Chile"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'CHL_BUR4_2021_'
														
 
															+    output_filename = "CHL_BUR4_2021_"
														
 
															-    inventory_file = 'Inventario_Nacional_de_GEI-1990-2018.xlsx'
														
 
															+    inventory_file = "Inventario_Nacional_de_GEI-1990-2018.xlsx"
														
 
															     years_to_read = range(1990, 2018 + 1)
														
 
															     # configuration for conversion to PRIMAP2 data format
														
 
															     unit_row = "header"
														
 
															     unit_info = {
														
 
															-        'regexp_entity': r'(.*)\s\(.*\)$',
														
 
															-        'regexp_unit': r'.*\s\((.*)\)$',
														
 
															-        'default_unit': 'kt',
														
 
															-        'manual_repl_unit': {
														
 
															-            'kt CO₂ eq': 'ktCO2eq',
														
 
															-            'HFC (kt CO₂ eq)': 'ktCO2eq',
														
 
															-            'PFC (kt CO₂ eq)': 'ktCO2eq',
														
 
															-            'SF₆ (kt CO₂ eq)': 'ktCO2eq',
														
 
															+        "regexp_entity": r"(.*)\s\(.*\)$",
														
 
															+        "regexp_unit": r".*\s\((.*)\)$",
														
 
															+        "default_unit": "kt",
														
 
															+        "manual_repl_unit": {
														
 
															+            "kt CO₂ eq": "ktCO2eq",
														
 
															+            "HFC (kt CO₂ eq)": "ktCO2eq",
														
 
															+            "PFC (kt CO₂ eq)": "ktCO2eq",
														
 
															+            "SF₆ (kt CO₂ eq)": "ktCO2eq",
														
 
															+        },
														
 
															+        "manual_repl_entity": {
														
 
															+            "kt CO₂ eq": "KYOTOGHG (AR4GWP100)",
														
 
															+            "HFC (kt CO₂ eq)": "HFCS (AR4GWP100)",
														
 
															+            "PFC (kt CO₂ eq)": "PFCS (AR4GWP100)",
														
 
															+            "SF₆ (kt CO₂ eq)": "SF6 (AR4GWP100)",
														
 
															         },
														
 
															-        'manual_repl_entity': {
														
 
															-            'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
														
 
															-            'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
														
 
															-            'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
														
 
															-            'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
														
 
															-        }
														
 
															     }
														
 
															-    cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
														
 
															-                    'Categorías de fuente y sumidero de gases de efecto invernadero.1']
														
 
															+    cols_to_drop = [
														
 
															+        "Unnamed: 14",
														
 
															+        "Unnamed: 16",
														
 
															+        "Código IPCC.1",
														
 
															+        "Categorías de fuente y sumidero de gases de efecto invernadero.1",
														
 
															+    ]
														
 
															     # columns for category code and original category name
														
 
															-    index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
														
 
															+    index_cols = [
														
 
															+        "Código IPCC",
														
 
															+        "Categorías de fuente y sumidero de gases de efecto invernadero",
														
 
															+    ]
														
 
															     # operations on long format DF
														
 
															-    cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
														
 
															+    cols_for_space_stripping = ["category", "orig_cat_name", "entity"]
														
 
															     time_format = "%Y"
														
@@ -85,7 +93,7 @@ if __name__ == "__main__":
 
															         "source": "CHL-GHG-Inventory",
														
 
															         "provenance": "measured",
														
 
															         "area": "CHL",
														
 
															-        "scenario": "BUR4"
														
 
															+        "scenario": "BUR4",
														
 
															     }
														
 
															     coords_value_mapping = {
														
@@ -117,14 +125,14 @@ if __name__ == "__main__":
 
															     }
														
 
															     coords_value_filling = {
														
 
															-        'category': {  # col to fill
														
 
															-            'orig_cat_name': {  # col to fill from
														
 
															-                'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
														
 
															-                'Tanque internacional': 'M.BK',
														
 
															-                'Aviación internacional': 'M.BK.A',
														
 
															-                'Navegación internacional': 'M.BK.M',
														
 
															-                'Operaciones multilaterales': 'M.MULTIOP',
														
 
															-                'Emisiones de CO2 de la biomasa': 'M.BIO',
														
 
															+        "category": {  # col to fill
														
 
															+            "orig_cat_name": {  # col to fill from (from value: to value)
														
 
															+                "Todas las emisiones y las absorciones nacionales": "0",
														
 
															+                "Tanque internacional": "M.BK",
														
 
															+                "Aviación internacional": "M.BK.A",
														
 
															+                "Navegación internacional": "M.BK.M",
														
 
															+                "Operaciones multilaterales": "M.MULTIOP",
														
 
															+                "Emisiones de CO2 de la biomasa": "M.BIO",
														
 
															             }
														
 
															         }
														
 
															     }
														
@@ -141,7 +149,9 @@ if __name__ == "__main__":
 
															     filter_keep = {}
														
 
															     meta_data = {
														
 
															-        "references": "https://unfccc.int/documents/267936, https://snichile.mma.gob.cl/wp-content/uploads/2021/03/Inventario_Nacional_de_GEI-1990-2018.xlsx",
														
 
															+        "references": "https://unfccc.int/documents/267936, "
														
 
															+        "https://snichile.mma.gob.cl/wp-content/uploads/2021/03/"
														
 
															+        "Inventario_Nacional_de_GEI-1990-2018.xlsx",
														
 
															         "rights": "",
														
 
															         "contact": "mail@johannes-guetschow.de.de",
														
 
															         "title": "Chile: BUR4",
														
@@ -165,16 +175,24 @@ if __name__ == "__main__":
 
															     for year in years_to_read:
														
 
															         # read sheet for the year. Each sheet contains several tables,
														
 
															         # we only read the upper row as the other tables are summary tables
														
 
															-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
														
 
															+        df_current = pd.read_excel(
														
 
															+            input_folder / inventory_file,
														
 
															+            sheet_name=str(year),
														
 
															+            skiprows=2,
														
 
															+            nrows=442,
														
 
															+            engine="openpyxl",
														
 
															+        )
														
 
															         # drop the columns which are empty and repetition of the metadata for the second block
														
 
															-        df_current.drop(cols_to_drop, axis=1, inplace=True)
														
 
															+        df_current = df_current.drop(cols_to_drop, axis=1)
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set multi-index. necessary for the stack operation in the conversion to long format
														
 
															         df_current = df_current.set_index(index_cols)
														
 
															         # add unit row using information from entity row and add to index
														
 
															-        df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
														
 
															+        df_current = pm2.pm2io.nir_add_unit_information(
														
 
															+            df_current, unit_row=unit_row, **unit_info
														
 
															+        )
														
 
															         # actual conversion to long format
														
 
															         df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
														
 
															         # aggregate to one df
														
@@ -192,7 +210,7 @@ if __name__ == "__main__":
 
															     for col in cols_for_space_stripping:
														
 
															         df_all[col] = df_all[col].str.strip()
														
 
															-    df_all["category"] = df_all["category"].str.rstrip('.')
														
 
															+    df_all["category"] = df_all["category"].str.rstrip(".")
														
 
															     data_if = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_all,
														
@@ -204,11 +222,11 @@ if __name__ == "__main__":
 
															         coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															         filter_keep=filter_keep,
														
 
															-        meta_data=meta_data
														
 
															+        meta_data=meta_data,
														
 
															+        time_format="%Y",
														
 
															     )
														
 
															-
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -216,11 +234,16 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # save data to IF and native format
														
 
															     # ###
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # conversion to ipcc 2006 categories
														
@@ -236,10 +259,10 @@ if __name__ == "__main__":
 
															         coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															         filter_keep=filter_keep,
														
 
															-        meta_data=meta_data
														
 
															+        meta_data=meta_data,
														
 
															     )
														
 
															-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
														
 
															+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
														
 
															     filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
														
 
															     data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
														
@@ -252,10 +275,10 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -263,7 +286,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
														
@@ -274,12 +305,19 @@ if __name__ == "__main__":
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies_2006["category"]),
														
 
															+        data_if_2006,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
@@ -1,12 +1,16 @@
 
															-# this script reads data from Chile's 2020 national inventory which is underlying BUR4
														
 
															-# Data is read from the xlsx file
														
 
															+"""
														
 
															+Read Chile's 2022 inventory from xlsx
														
 
															+
														
 
															+This script reads data from Chile's 2022 national inventory which is underlying BUR5.
														
 
															+Data is read from the xlsx file
														
 
															+"""
														
 
															 import os
														
 
															 import sys
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
														
 
															+from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
														
 
															 from primap2.pm2io._data_reading import filter_data, matches_time_format
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -17,43 +21,50 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # folders and files
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Chile' / 'BUR5'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Chile'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Chile" / "BUR5"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Chile"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'CHL_BUR5_2022_'
														
 
															+    output_filename = "CHL_BUR5_2022_"
														
 
															-    inventory_file = '2022_GEI_CL.xlsx'
														
 
															+    inventory_file = "2022_GEI_CL.xlsx"
														
 
															     years_to_read = range(1990, 2020 + 1)
														
 
															-    time_format='%Y'
														
 
															+    time_format = "%Y"
														
 
															     # configuration for conversion to PRIMAP2 data format
														
 
															     unit_row = "header"
														
 
															     unit_info = {
														
 
															-        'regexp_entity': r'(.*)\s\(.*\)$',
														
 
															-        'regexp_unit': r'.*\s\((.*)\)$',
														
 
															-        'default_unit': 'kt',
														
 
															-        'manual_repl_unit': {
														
 
															-            'kt CO₂ eq': 'ktCO2eq',
														
 
															-            'HFC (kt CO₂ eq)': 'ktCO2eq',
														
 
															-            'PFC (kt CO₂ eq)': 'ktCO2eq',
														
 
															-            'SF₆ (kt CO₂ eq)': 'ktCO2eq',
														
 
															+        "regexp_entity": r"(.*)\s\(.*\)$",
														
 
															+        "regexp_unit": r".*\s\((.*)\)$",
														
 
															+        "default_unit": "kt",
														
 
															+        "manual_repl_unit": {
														
 
															+            "kt CO₂ eq": "ktCO2eq",
														
 
															+            "HFC (kt CO₂ eq)": "ktCO2eq",
														
 
															+            "PFC (kt CO₂ eq)": "ktCO2eq",
														
 
															+            "SF₆ (kt CO₂ eq)": "ktCO2eq",
														
 
															+        },
														
 
															+        "manual_repl_entity": {
														
 
															+            "kt CO₂ eq": "KYOTOGHG (AR4GWP100)",
														
 
															+            "HFC (kt CO₂ eq)": "HFCS (AR4GWP100)",
														
 
															+            "PFC (kt CO₂ eq)": "PFCS (AR4GWP100)",
														
 
															+            "SF₆ (kt CO₂ eq)": "SF6 (AR4GWP100)",
														
 
															         },
														
 
															-        'manual_repl_entity': {
														
 
															-            'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
														
 
															-            'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
														
 
															-            'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
														
 
															-            'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
														
 
															-        }
														
 
															     }
														
 
															-    cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
														
 
															-                    'Categorías de fuente y sumidero de gases de efecto invernadero.1']
														
 
															+    cols_to_drop = [
														
 
															+        "Unnamed: 14",
														
 
															+        "Unnamed: 16",
														
 
															+        "Código IPCC.1",
														
 
															+        "Categorías de fuente y sumidero de gases de efecto invernadero.1",
														
 
															+    ]
														
 
															     # columns for category code and original category name
														
 
															-    index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
														
 
															+    index_cols = [
														
 
															+        "Código IPCC",
														
 
															+        "Categorías de fuente y sumidero de gases de efecto invernadero",
														
 
															+    ]
														
 
															     # operations on long format DF
														
 
															-    cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
														
 
															+    cols_for_space_stripping = ["category", "orig_cat_name", "entity"]
														
 
															     time_format = "%Y"
														
@@ -83,7 +94,7 @@ if __name__ == "__main__":
 
															         "source": "CHL-GHG-Inventory",
														
 
															         "provenance": "measured",
														
 
															         "area": "CHL",
														
 
															-        "scenario": "BUR5"
														
 
															+        "scenario": "BUR5",
														
 
															     }
														
 
															     coords_value_mapping = {
														
@@ -115,14 +126,14 @@ if __name__ == "__main__":
 
															     }
														
 
															     coords_value_filling = {
														
 
															-        'category': {  # col to fill
														
 
															-            'orig_cat_name': {  # col to fill from
														
 
															-                'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
														
 
															-                'Tanque internacional': 'M.BK',
														
 
															-                'Aviación internacional': 'M.BK.A',
														
 
															-                'Navegación internacional': 'M.BK.M',
														
 
															-                'Operaciones multilaterales': 'M.MULTIOP',
														
 
															-                'Emisiones de CO2 de la biomasa': 'M.BIO',
														
 
															+        "category": {  # col to fill
														
 
															+            "orig_cat_name": {  # col to fill from (from value: to value)
														
 
															+                "Todas las emisiones y las absorciones nacionales": "0",
														
 
															+                "Tanque internacional": "M.BK",
														
 
															+                "Aviación internacional": "M.BK.A",
														
 
															+                "Navegación internacional": "M.BK.M",
														
 
															+                "Operaciones multilaterales": "M.MULTIOP",
														
 
															+                "Emisiones de CO2 de la biomasa": "M.BIO",
														
 
															             }
														
 
															         }
														
 
															     }
														
@@ -132,14 +143,19 @@ if __name__ == "__main__":
 
															             "entity": ["Absorciones CO₂", "Emisiones CO₂"],
														
 
															         },
														
 
															         "f2": {
														
 
															-            "orig_cat_name": ["Partidas informativas", "Todas las emisiones nacionales"],
														
 
															+            "orig_cat_name": [
														
 
															+                "Partidas informativas",
														
 
															+                "Todas las emisiones nacionales",
														
 
															+            ],
														
 
															         },
														
 
															     }
														
 
															     filter_keep = {}
														
 
															     meta_data = {
														
 
															-        "references": "https://unfccc.int/documents/624735, https://snichile.mma.gob.cl/wp-content/uploads/2023/04/2022_GEI_CL.xlsx",
														
 
															+        "references": "https://unfccc.int/documents/624735, "
														
 
															+        "https://snichile.mma.gob.cl/wp-content/uploads/2023/04/"
														
 
															+        "2022_GEI_CL.xlsx",
														
 
															         "rights": "",
														
 
															         "contact": "mail@johannes-guetschow.de.de",
														
 
															         "title": "Chile: BUR5",
														
@@ -163,16 +179,24 @@ if __name__ == "__main__":
 
															     for year in years_to_read:
														
 
															         # read sheet for the year. Each sheet contains several tables,
														
 
															         # we only read the upper row as the other tables are summary tables
														
 
															-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
														
 
															+        df_current = pd.read_excel(
														
 
															+            input_folder / inventory_file,
														
 
															+            sheet_name=str(year),
														
 
															+            skiprows=2,
														
 
															+            nrows=442,
														
 
															+            engine="openpyxl",
														
 
															+        )
														
 
															         # drop the columns which are empty and repetition of the metadata for the second block
														
 
															-        df_current.drop(cols_to_drop, axis=1, inplace=True)
														
 
															+        df_current = df_current.drop(cols_to_drop, axis=1)
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set multi-index. necessary for the stack operation in the conversion to long format
														
 
															         df_current = df_current.set_index(index_cols)
														
 
															         # add unit row using information from entity row and add to index
														
 
															-        df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
														
 
															+        df_current = pm2.pm2io.nir_add_unit_information(
														
 
															+            df_current, unit_row=unit_row, **unit_info
														
 
															+        )
														
 
															         # actual conversion to long format
														
 
															         df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
														
 
															         # aggregate to one df
														
@@ -190,7 +214,7 @@ if __name__ == "__main__":
 
															     for col in cols_for_space_stripping:
														
 
															         df_all[col] = df_all[col].str.strip()
														
 
															-    df_all["category"] = df_all["category"].str.rstrip('.')
														
 
															+    df_all["category"] = df_all["category"].str.rstrip(".")
														
 
															     data_if = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_all,
														
@@ -206,8 +230,7 @@ if __name__ == "__main__":
 
															         time_format=time_format,
														
 
															     )
														
 
															-
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -215,11 +238,16 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # save data to IF and native format
														
 
															     # ###
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # conversion to ipcc 2006 categories
														
@@ -236,10 +264,10 @@ if __name__ == "__main__":
 
															         filter_remove=filter_remove,
														
 
															         filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															-        time_format=time_format
														
 
															+        time_format=time_format,
														
 
															     )
														
 
															-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
														
 
															+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
														
 
															     filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
														
 
															     data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
														
@@ -252,10 +280,10 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -263,8 +291,18 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
														
 
															-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name"])
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															+            df_combine = df_combine.drop(
														
 
															+                columns=["category (IPCC2006_PRIMAP)", "orig_cat_name"]
														
 
															+            )
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
														
@@ -275,12 +313,19 @@ if __name__ == "__main__":
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies_2006["category"]),
														
 
															+        data_if_2006,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
@@ -1 +1,30 @@
 
															-"""Code to read Colombia's submissions"""
														
 
															+"""Read Colombia's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'COL'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=COL
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Colombia/read_COL_BUR3_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Colombia/read_COL_BUR3_from_xlsx.py
@@ -1,6 +1,10 @@
 
															-# this script reads data from Colombia's BUR3
														
 
															-# Data is read from the xlsx file which has been exported from the google docs
														
 
															-# spreadsheet which is linked in the BUR
														
 
															+"""
														
 
															+Read Colombia's 2020 inventory from xlsx
														
 
															+
														
 
															+this script reads data from Colombia's BUR3
														
 
															+Data is read from the xlsx file which has been exported from the google docs
														
 
															+spreadsheet which is linked in the BUR
														
 
															+"""
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
@@ -14,17 +18,17 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Colombia' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Colombia'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Colombia" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Colombia"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'COL_BUR3_2022_'
														
 
															+    output_filename = "COL_BUR3_2022_"
														
 
															-    inventory_file = 'TR_1990-2018_BUR3-AR5_VF.xlsx'
														
 
															+    inventory_file = "TR_1990-2018_BUR3-AR5_VF.xlsx"
														
 
															     years_to_read = range(1990, 2018 + 1)
														
 
															-    sheet_to_read = 'TR 1990-2018'
														
 
															+    sheet_to_read = "TR 1990-2018"
														
 
															     cols_to_read = range(0, 47)
														
 
															     compression = dict(zlib=True, complevel=9)
														
@@ -37,7 +41,6 @@ if __name__ == "__main__":
 
															         "unit": "unit",
														
 
															     }
														
 
															-
														
 
															     coords_terminologies = {
														
 
															         "area": "ISO3",
														
 
															         "category": "IPCC2006",
														
@@ -54,53 +57,52 @@ if __name__ == "__main__":
 
															     coords_value_mapping = {
														
 
															         "unit": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'Absorciones CO2': 'CO2 Absorptions',
														
 
															-            'Emisiones CO2': 'CO2 Emissions',
														
 
															-            'Emisiones netas (AR5GWP100)': 'KYOTOGHG (AR5GWP100)',
														
 
															-            'HFC-23': 'HFC23',
														
 
															-            'HFC-32': 'HFC32',
														
 
															+            "Absorciones CO2": "CO2 Absorptions",
														
 
															+            "Emisiones CO2": "CO2 Emissions",
														
 
															+            "Emisiones netas (AR5GWP100)": "KYOTOGHG (AR5GWP100)",
														
 
															+            "HFC-23": "HFC23",
														
 
															+            "HFC-32": "HFC32",
														
 
															             #'HFC-41': 'HFC41',
														
 
															-            'HFC-43-10mee': 'HFC4310mee',
														
 
															-            'HFC-125': 'HFC125',
														
 
															+            "HFC-43-10mee": "HFC4310mee",
														
 
															+            "HFC-125": "HFC125",
														
 
															             #'HFC-134': 'HFC134',
														
 
															-            'HFC-134a': 'HFC134a',
														
 
															-            'HFC-152a': 'HFC152a',
														
 
															+            "HFC-134a": "HFC134a",
														
 
															+            "HFC-152a": "HFC152a",
														
 
															             #'HFC-143': 'HFC143',
														
 
															-            'HFC-143a': 'HFC143a',
														
 
															-            'HFC-227ea': 'HFC227ea',
														
 
															-            'HFC-236fa': 'HFC236fa',
														
 
															+            "HFC-143a": "HFC143a",
														
 
															+            "HFC-227ea": "HFC227ea",
														
 
															+            "HFC-236fa": "HFC236fa",
														
 
															             #'HFC-245ca': 'HFC245ca',
														
 
															-            'HFC-245fa': 'HFC245fa',
														
 
															-            'HFC-365mfc': 'HFC365mfc',
														
 
															-            'PFC-116': 'C2F6',
														
 
															-            'PFC-14': 'CF4',
														
 
															+            "HFC-245fa": "HFC245fa",
														
 
															+            "HFC-365mfc": "HFC365mfc",
														
 
															+            "PFC-116": "C2F6",
														
 
															+            "PFC-14": "CF4",
														
 
															         },
														
 
															     }
														
 
															-
														
 
															     filter_remove = {
														
 
															         "fGWP": {
														
 
															             "entity": [
														
 
															-                'Absorciones CO2 (AR5GWP100)',
														
 
															-                'Absorciones totales (AR5GWP100)',
														
 
															-                'CH4 (AR5GWP100)',
														
 
															-                'Emisiones CO2 (AR5GWP100)',
														
 
															-                'Total emisiones (AR5GWP100)',
														
 
															-                'HFC-125 (AR5GWP100)',
														
 
															-                'HFC-134a (AR5GWP100)',
														
 
															-                'HFC-143a (AR5GWP100)',
														
 
															-                'HFC-152a (AR5GWP100)',
														
 
															-                'HFC-227ea (AR5GWP100)',
														
 
															-                'HFC-23 (AR5GWP100)',
														
 
															-                'HFC-236fa (AR5GWP100)',
														
 
															-                'HFC-245fa (AR5GWP100)',
														
 
															-                'HFC-32 (AR5GWP100)',
														
 
															-                'HFC-365mfc (AR5GWP100)',
														
 
															-                'HFC-43-10mee (AR5GWP100)',
														
 
															-                'N2O (AR5GWP100)',
														
 
															-                'PFC-116 (AR5GWP100)',
														
 
															-                'PFC-14 (AR5GWP100)',
														
 
															-                'SF6 (AR5GWP100)',
														
 
															+                "Absorciones CO2 (AR5GWP100)",
														
 
															+                "Absorciones totales (AR5GWP100)",
														
 
															+                "CH4 (AR5GWP100)",
														
 
															+                "Emisiones CO2 (AR5GWP100)",
														
 
															+                "Total emisiones (AR5GWP100)",
														
 
															+                "HFC-125 (AR5GWP100)",
														
 
															+                "HFC-134a (AR5GWP100)",
														
 
															+                "HFC-143a (AR5GWP100)",
														
 
															+                "HFC-152a (AR5GWP100)",
														
 
															+                "HFC-227ea (AR5GWP100)",
														
 
															+                "HFC-23 (AR5GWP100)",
														
 
															+                "HFC-236fa (AR5GWP100)",
														
 
															+                "HFC-245fa (AR5GWP100)",
														
 
															+                "HFC-32 (AR5GWP100)",
														
 
															+                "HFC-365mfc (AR5GWP100)",
														
 
															+                "HFC-43-10mee (AR5GWP100)",
														
 
															+                "N2O (AR5GWP100)",
														
 
															+                "PFC-116 (AR5GWP100)",
														
 
															+                "PFC-14 (AR5GWP100)",
														
 
															+                "SF6 (AR5GWP100)",
														
 
															             ],
														
 
															         },
														
 
															     }
														
@@ -116,25 +118,33 @@ if __name__ == "__main__":
 
															         "institution": "UNFCCC",
														
 
															     }
														
 
															-
														
 
															     # read the data
														
 
															-    data_raw = pd.read_excel(input_folder / inventory_file, sheet_name=sheet_to_read,
														
 
															-                             skiprows=0, nrows=15025, usecols=cols_to_read,
														
 
															-                             engine="openpyxl", header=None)
														
 
															+    data_raw = pd.read_excel(
														
 
															+        input_folder / inventory_file,
														
 
															+        sheet_name=sheet_to_read,
														
 
															+        skiprows=0,
														
 
															+        nrows=15025,
														
 
															+        usecols=cols_to_read,
														
 
															+        engine="openpyxl",
														
 
															+        header=None,
														
 
															+    )
														
 
															     # fill the units to the right as for merged cells the unit is only in the first cell
														
 
															-    data_raw.iloc[unit_row] = data_raw.iloc[unit_row].fillna(axis=0, method="ffill")
														
 
															+    data_raw.iloc[unit_row] = data_raw.iloc[unit_row].ffill(axis=0)
														
 
															     merge_rows = [1, 2]
														
 
															     for row in merge_rows:
														
 
															         data_raw.iloc[row] = data_raw.iloc[row].astype(str).str.replace("nan", "")
														
 
															     data_raw.iloc[merge_rows[0]] = (
														
 
															-    data_raw.iloc[merge_rows[0]].astype(str) + " " + data_raw.iloc[
														
 
															-            merge_rows[1]].astype(str))
														
 
															+        data_raw.iloc[merge_rows[0]].astype(str)
														
 
															+        + " "
														
 
															+        + data_raw.iloc[merge_rows[1]].astype(str)
														
 
															+    )
														
 
															     data_raw.iloc[merge_rows[0]] = data_raw.iloc[merge_rows[0]].str.strip()
														
 
															     data_raw = data_raw.drop(index=data_raw.index[merge_rows[1]])
														
 
															     # merge the category cols
														
 
															     def join_code_parts(series):
														
 
															+        """Create a code from the data in the individual columns"""
														
 
															         code = series.iloc[0]
														
 
															         for part in series.iloc[1:]:
														
 
															             if part != "nan":
														
@@ -143,10 +153,11 @@ if __name__ == "__main__":
 
															             code = "0"
														
 
															         return code
														
 
															-    cat_columns = [0, 1, 2, 3, 4, 5] # xlsx cols are ["MOD","CAP","CAT","SCAT","NROM",
														
 
															+    cat_columns = [0, 1, 2, 3, 4, 5]  # xlsx cols are ["MOD","CAP","CAT","SCAT","NROM",
														
 
															     # "NUM"]
														
 
															-    data_raw["category"] = data_raw[cat_columns].astype(str).agg(func=join_code_parts,
														
 
															-                                                                 axis=1)
														
 
															+    data_raw["category"] = (
														
 
															+        data_raw[cat_columns].astype(str).agg(func=join_code_parts, axis=1)
														
 
															+    )
														
 
															     data_raw = data_raw.drop(columns=cat_columns)
														
 
															     # prepare the dataframe for processig with primap2 functions
														
@@ -162,27 +173,29 @@ if __name__ == "__main__":
 
															     for year in years:
														
 
															         data_year = data_raw[data_raw["ANO"] == year]
														
 
															         data_year = data_year.drop(columns=["ANO", "Categorías de fuente y sumideros"])
														
 
															-        df_long_new = pm2.pm2io.nir_convert_df_to_long(data_year, year,
														
 
															-                                                       ["category", "unit", "entity",
														
 
															-                                                        "time", "data"])
														
 
															+        df_long_new = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            data_year, year, ["category", "unit", "entity", "time", "data"]
														
 
															+        )
														
 
															         if df_all is None:
														
 
															             df_all = df_long_new
														
 
															         else:
														
 
															-            df_all = pd.concat([df_all, df_long_new], axis=0, join='outer')
														
 
															+            df_all = pd.concat([df_all, df_long_new], axis=0, join="outer")
														
 
															     df_all["category"] = df_all["category"].str[0]
														
 
															     # map units
														
 
															-    df_all["unit"] = df_all["unit"].replace({
														
 
															-        'GEI DIRECTOS - Gg ': 'Gg',
														
 
															-        'GEI DIRECTOS - Gg CO2 equivalente': 'GgCO2eq',
														
 
															-    }
														
 
															+    df_all["unit"] = df_all["unit"].replace(
														
 
															+        {
														
 
															+            "GEI DIRECTOS - Gg ": "Gg",
														
 
															+            "GEI DIRECTOS - Gg CO2 equivalente": "GgCO2eq",
														
 
															+        }
														
 
															     )
														
 
															     # add GWP information to entity
														
 
															     for entity in df_all["entity"].unique():
														
 
															-        df_all["entity"][(df_all["entity"] == entity) & (
														
 
															-                    df_all["unit"] == "GgCO2eq")] = f"{entity} (AR5GWP100)"
														
 
															+        df_all["entity"][
														
 
															+            (df_all["entity"] == entity) & (df_all["unit"] == "GgCO2eq")
														
 
															+        ] = f"{entity} (AR5GWP100)"
														
 
															     # reset index before conversion to pm2 IF
														
 
															     df_all = df_all.reset_index(drop=True)
														
@@ -196,26 +209,25 @@ if __name__ == "__main__":
 
															     data_if = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_all,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															-        convert_str=True
														
 
															-        )
														
 
															-
														
 
															+        convert_str=True,
														
 
															+        time_format="%Y",
														
 
															+    )
														
 
															     # combine CO2 emissions and absorptions
														
 
															-    data_CO2 = data_if[data_if["entity"].isin([
														
 
															-        'CO2 Absorptions', 'CO2 Emissions'])]
														
 
															+    data_CO2 = data_if[data_if["entity"].isin(["CO2 Absorptions", "CO2 Emissions"])]
														
 
															-    time_format = '%Y'
														
 
															+    time_format = "%Y"
														
 
															     time_columns = [
														
 
															         col
														
 
															-        for col in data_CO2.columns.values
														
 
															+        for col in data_CO2.columns.to_numpy()
														
 
															         if matches_time_format(col, time_format)
														
 
															     ]
														
@@ -223,20 +235,23 @@ if __name__ == "__main__":
 
															         data_CO2[col] = pd.to_numeric(data_CO2[col], errors="coerce")
														
 
															     data_CO2 = data_CO2.groupby(
														
 
															-        by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)',
														
 
															+        by=[
														
 
															+            "source",
														
 
															+            "scenario (PRIMAP)",
														
 
															+            "provenance",
														
 
															+            "area (ISO3)",
														
 
															             f"category ({coords_terminologies['category']})",
														
 
															-            'unit']).sum(min_count = 1)
														
 
															+            "unit",
														
 
															+        ]
														
 
															+    ).sum(min_count=1)
														
 
															-    data_CO2.insert(0, 'entity', 'CO2')
														
 
															+    data_CO2.insert(0, "entity", "CO2")
														
 
															     data_CO2 = data_CO2.reset_index()
														
 
															     data_if = pd.concat([data_if, data_CO2])
														
 
															-
														
 
															-
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															-
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -245,7 +260,12 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Indonesia's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'IDN'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=IDN
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py
@@ -1,6 +1,11 @@
 
															-# this script reads data from Indonesia's BUR3
														
 
															-# Data is read from pdf
														
 
															-# only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
														
 
															+"""
														
 
															+Read Indonesia's BUR3 from pdf
														
 
															+
														
 
															+This script reads data from Indonesia's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
														
 
															+
														
 
															+"""
														
 
															 import camelot
														
 
															 import numpy as np
														
@@ -14,18 +19,19 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Indonesia' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Indonesia'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Indonesia" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Indonesia"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'IDN_BUR3_2021_'
														
 
															+    output_filename = "IDN_BUR3_2021_"
														
 
															-    inventory_file = 'IndonesiaBUR_3_FINAL_REPORT_2.pdf'
														
 
															+    inventory_file = "IndonesiaBUR_3_FINAL_REPORT_2.pdf"
														
 
															-    gwp_to_use = 'SARGWP100'
														
 
															+    gwp_to_use = "SARGWP100"
														
 
															-    pages_to_read = range(61,65) # 65 is not read properly but contains almost no data anyway, so add it by hand '61-65'
														
 
															+    pages_to_read = range(61, 65)  # 65 is not read properly but contains almost no
														
 
															+    # data anyway, so add it by hand
														
 
															     compression = dict(zlib=True, complevel=9)
														
@@ -36,17 +42,18 @@ if __name__ == "__main__":
 
															     # special header as category code and name in one column
														
 
															     header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															+    time_format = "%Y"
														
 
															     # manual category codes
														
 
															     cat_codes_manual = {
														
 
															-        'Total National Emissions and Removals': '0',
														
 
															-        'Peat Decomposition': 'M.3.B.4.APD',
														
 
															-        'Peat Fire': 'M.3.B.4.APF',
														
 
															-        '4A1.2 Industrial Solid Waste Disposal': 'M.4.A.Ind',
														
 
															+        "Total National Emissions and Removals": "0",
														
 
															+        "Peat Decomposition": "M.3.B.4.APD",
														
 
															+        "Peat Fire": "M.3.B.4.APF",
														
 
															+        "4A1.2 Industrial Solid Waste Disposal": "M.4.A.Ind",
														
 
															         #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
														
 
															     }
														
 
															-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
														
 
															+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9]{1,4})\s.*"
														
 
															     coords_cols = {
														
 
															         "category": "category",
														
@@ -75,24 +82,26 @@ if __name__ == "__main__":
 
															         "unit": "PRIMAP1",
														
 
															         "category": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'Total 3 Gases': f"CO2CH4N2O ({gwp_to_use})",
														
 
															-            'Net CO2 (1) (2)': 'CO2',
														
 
															-            'CH4': f"CH4 ({gwp_to_use})",
														
 
															-            'N2O': f"N2O ({gwp_to_use})",
														
 
															-            'HFCs': f"HFCS ({gwp_to_use})",
														
 
															-            'PFCs': f"PFCS ({gwp_to_use})",
														
 
															-            'SF6': f"SF6 ({gwp_to_use})",
														
 
															-            'NOx': 'NOX',
														
 
															-            'CO': 'CO', # no mapping, just added for completeness here
														
 
															-            'NMVOCs': 'NMVOC',
														
 
															-            'SO2': 'SO2', # no mapping, just added for completeness here
														
 
															-            'Other halogenated gases with CO2 equivalent conversion factors (3)': f"OTHERHFCS ({gwp_to_use})",
														
 
															+            "Total 3 Gases": f"CO2CH4N2O ({gwp_to_use})",
														
 
															+            "Net CO2 (1) (2)": "CO2",
														
 
															+            "CH4": f"CH4 ({gwp_to_use})",
														
 
															+            "N2O": f"N2O ({gwp_to_use})",
														
 
															+            "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+            "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+            "SF6": f"SF6 ({gwp_to_use})",
														
 
															+            "NOx": "NOX",
														
 
															+            "CO": "CO",  # no mapping, just added for completeness here
														
 
															+            "NMVOCs": "NMVOC",
														
 
															+            "SO2": "SO2",  # no mapping, just added for completeness here
														
 
															+            "Other halogenated gases with CO2 equivalent conversion factors (3)": f"OTHERHFCS ({gwp_to_use})",
														
 
															         },
														
 
															     }
														
 
															-
														
 
															     filter_remove = {
														
 
															-        "fHFC": {"entity": 'Other halogenated gases without CO2 equivalent conversion factors (4)'}
														
 
															+        "fHFC": {
														
 
															+            "entity": "Other halogenated gases without CO2 equivalent conversion "
														
 
															+            "factors (4)"
														
 
															+        }
														
 
															     }
														
 
															     filter_keep = {}
														
@@ -107,84 +116,113 @@ if __name__ == "__main__":
 
															     }
														
 
															     # convert to mass units where possible
														
 
															-    entities_to_convert_to_mass = [
														
 
															-        'CH4', 'N2O', 'SF6'
														
 
															-    ]
														
 
															+    entities_to_convert_to_mass = ["CH4", "N2O", "SF6"]
														
 
															-    # CO2 equivalents don't make sense for these substances, so unit has to be Gg instead of Gg CO2 equivalents as indicated in the table
														
 
															-    entities_to_fix_unit = [
														
 
															-        'NOx', 'CO', 'NMVOCs', 'SO2'
														
 
															-    ]
														
 
															+    # CO2 equivalents don't make sense for these substances, so unit has to be Gg
														
 
															+    # instead of Gg CO2 equivalents as indicated in the table
														
 
															+    entities_to_fix_unit = ["NOx", "CO", "NMVOCs", "SO2"]
														
 
															     # add the data for the last page by hand as it's only one row
														
 
															     data_last_page = [
														
 
															-        ['5B Other (please specify)', 'Total 3 Gases', 'GgCO2eq', '2019', 'NE'],
														
 
															-        ['5B Other (please specify)', 'Net CO2 (1) (2)', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'CH4', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'N2O', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'HFCs', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'PFCs', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'SF6', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'Other halogenated gases with CO2 equivalent conversion factors (3)', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'Other halogenated gases without CO2 equivalent conversion factors (4)', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'NOx', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'CO', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'NMVOCs', 'GgCO2eq', '2019', np.nan],
														
 
															-        ['5B Other (please specify)', 'SO2', 'GgCO2eq', '2019', np.nan],
														
 
															+        ["5B Other (please specify)", "Total 3 Gases", "GgCO2eq", "2019", "NE"],
														
 
															+        ["5B Other (please specify)", "Net CO2 (1) (2)", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "CH4", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "N2O", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "HFCs", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "PFCs", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "SF6", "GgCO2eq", "2019", np.nan],
														
 
															+        [
														
 
															+            "5B Other (please specify)",
														
 
															+            "Other halogenated gases with CO2 equivalent conversion factors (3)",
														
 
															+            "GgCO2eq",
														
 
															+            "2019",
														
 
															+            np.nan,
														
 
															+        ],
														
 
															+        [
														
 
															+            "5B Other (please specify)",
														
 
															+            "Other halogenated gases without CO2 equivalent conversion factors (4)",
														
 
															+            "GgCO2eq",
														
 
															+            "2019",
														
 
															+            np.nan,
														
 
															+        ],
														
 
															+        ["5B Other (please specify)", "NOx", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "CO", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "NMVOCs", "GgCO2eq", "2019", np.nan],
														
 
															+        ["5B Other (please specify)", "SO2", "GgCO2eq", "2019", np.nan],
														
 
															     ]
														
 
															     df_last_page = pd.DataFrame(data_last_page, columns=header_long)
														
 
															     aggregate_cats = {
														
 
															-        '1.A.4': {'sources': ['1.A.4.a', '1.A.4.b'], 'name': 'Other Sectors (calculated)'},
														
 
															-        '2.A.4': {'sources': ['2.A.4.a', '2.A.4.b', '2.A.4.d'], 'name': 'Other Process uses of Carbonates (calculated)'},
														
 
															-        '2.B.8': {'sources': ['2.B.8.a', '2.B.8.b', '2.B.8.c', '2.B.8.f'], 'name': 'Petrochemical and Carbon Black production (calculated)'},
														
 
															-        '4.A': {'sources': ['4.A.2', 'M.4.A.Ind'], 'name': 'Solid Waste Disposal (calculated)'},
														
 
															+        "1.A.4": {
														
 
															+            "sources": ["1.A.4.a", "1.A.4.b"],
														
 
															+            "name": "Other Sectors (calculated)",
														
 
															+        },
														
 
															+        "2.A.4": {
														
 
															+            "sources": ["2.A.4.a", "2.A.4.b", "2.A.4.d"],
														
 
															+            "name": "Other Process uses of Carbonates (calculated)",
														
 
															+        },
														
 
															+        "2.B.8": {
														
 
															+            "sources": ["2.B.8.a", "2.B.8.b", "2.B.8.c", "2.B.8.f"],
														
 
															+            "name": "Petrochemical and Carbon Black production (calculated)",
														
 
															+        },
														
 
															+        "4.A": {
														
 
															+            "sources": ["4.A.2", "M.4.A.Ind"],
														
 
															+            "name": "Solid Waste Disposal (calculated)",
														
 
															+        },
														
 
															     }
														
 
															     aggregate_cats_N2O = {
														
 
															-        '3.A.2': {'sources': ['3.A.2.b'], 'name': '3A2 Manure Management'},
														
 
															-        '3.A': {'sources': ['3.A.2'], 'name': '3A Livestock'},
														
 
															+        "3.A.2": {"sources": ["3.A.2.b"], "name": "3A2 Manure Management"},
														
 
															+        "3.A": {"sources": ["3.A.2"], "name": "3A Livestock"},
														
 
															     }
														
 
															     aggregate_cats_CO2CH4N2O = {
														
 
															-        '3.A.2': {'sources': ['3.A.2', '3.A.2.b'], 'name': '3A2 Manure Management'},
														
 
															+        "3.A.2": {"sources": ["3.A.2", "3.A.2.b"], "name": "3A2 Manure Management"},
														
 
															     }
														
 
															     df_all = None
														
 
															     for page in pages_to_read:
														
 
															-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=str(page),
														
 
															-                                  flavor='lattice')
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / inventory_file), pages=str(page), flavor="lattice"
														
 
															+        )
														
 
															         df_this_table = tables[0].df
														
 
															         # replace line breaks, double, and triple spaces in category names
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
														
 
															         # replace line breaks in units and entities
														
 
															-        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace('\n',
														
 
															-                                                                                    '')
														
 
															-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace('\n', '')
														
 
															+        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace(
														
 
															+            "\n", ""
														
 
															+        )
														
 
															+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace(
														
 
															+            "\n", ""
														
 
															+        )
														
 
															-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
														
 
															-                                                           entity_row=entity_row,
														
 
															-                                                           regexp_entity=".*",
														
 
															-                                                           default_unit="GgCO2eq")  # , **unit_info)
														
 
															+        df_this_table = pm2.pm2io.nir_add_unit_information(
														
 
															+            df_this_table,
														
 
															+            unit_row=unit_row,
														
 
															+            entity_row=entity_row,
														
 
															+            regexp_entity=".*",
														
 
															+            default_unit="GgCO2eq",
														
 
															+        )
														
 
															         # set index and convert to long format
														
 
															         df_this_table = df_this_table.set_index(index_cols)
														
 
															-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
														
 
															-                                                              header_long)
														
 
															+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            df_this_table, year, header_long
														
 
															+        )
														
 
															         df_this_table_long["orig_cat_name"] = df_this_table_long["orig_cat_name"].str[0]
														
 
															         # combine with tables for other sectors (merge not append)
														
 
															         if df_all is None:
														
 
															             df_all = df_this_table_long
														
 
															         else:
														
 
															-            df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
														
 
															+            df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
														
 
															     # add the last page manually
														
 
															-    df_all = pd.concat([df_all, df_last_page], axis=0, join='outer')
														
 
															+    df_all = pd.concat([df_all, df_last_page], axis=0, join="outer")
														
 
															     # fix the units of aerosols and precursors
														
 
															     for entity in entities_to_fix_unit:
														
@@ -196,22 +234,24 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															     df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-        return m.group('code')
														
 
															-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_all["category"] = df_all["category"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     ###### convert to primap2 IF
														
 
															     # replace "," with "" in data
														
 
															-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(',','', regex=False)
														
 
															+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(",", "", regex=False)
														
 
															     # make sure all col headers are str
														
 
															     df_all.columns = df_all.columns.map(str)
														
 
															-
														
 
															-
														
 
															     # ###
														
 
															     # convert to PRIMAP2 interchange format
														
 
															     # ###
														
@@ -222,12 +262,13 @@ if __name__ == "__main__":
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															-        convert_str=True
														
 
															-        )
														
 
															+        convert_str=True,
														
 
															+        time_format=time_format,
														
 
															+    )
														
 
															     cat_label = "category (IPCC2006)"
														
@@ -244,10 +285,9 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -255,8 +295,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
														
@@ -267,10 +314,10 @@ if __name__ == "__main__":
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-
														
 
															     # delete cat 3 for N2O as it's wrong
														
 
															-    index_3A_N2O = data_if[(data_if[cat_label] == '3') &
														
 
															-                           (data_if['entity'] == 'N2O')].index
														
 
															+    index_3A_N2O = data_if[
														
 
															+        (data_if[cat_label] == "3") & (data_if["entity"] == "N2O")
														
 
															+    ].index
														
 
															     data_if = data_if.drop(index_3A_N2O)
														
 
															     # aggregate cat 3 for N2O
														
@@ -283,10 +330,10 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -294,11 +341,20 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
@@ -307,8 +363,9 @@ if __name__ == "__main__":
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															     # delete cat 3.A.2 for CO2CH4N2O as it's wrong
														
 
															-    index_3A2_CO2CH4N2O = data_if[(data_if[cat_label] == '3.A.2') &
														
 
															-                           (data_if['entity'] == 'CH4CO2N2O (SARGWP100)')].index
														
 
															+    index_3A2_CO2CH4N2O = data_if[
														
 
															+        (data_if[cat_label] == "3.A.2") & (data_if["entity"] == "CH4CO2N2O (SARGWP100)")
														
 
															+    ].index
														
 
															     data_if = data_if.drop(index_3A2_CO2CH4N2O)
														
 
															     # aggregate cat 3 for N2O
														
@@ -321,10 +378,10 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -332,11 +389,20 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
@@ -344,7 +410,6 @@ if __name__ == "__main__":
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-
														
 
															     data_if.attrs = attrs
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
@@ -372,9 +437,11 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Israel's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'ISR'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=ISR
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py
@@ -1,73 +1,91 @@
 
															+"""Config for Israel's BUR3
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 #### configuration for trend tables
														
 
															 import locale
														
 
															-gwp_to_use = 'SARGWP100'
														
 
															-terminology_proc = 'IPCC2006_PRIMAP'
														
 
															+gwp_to_use = "SARGWP100"
														
 
															+terminology_proc = "IPCC2006_PRIMAP"
														
 
															 # bunkers [0,1] need different specs
														
 
															 trend_table_def = {
														
 
															     # only GHG read, rest dropped
														
 
															-    'GHG': {
														
 
															-        'tables': [2],
														
 
															-        'cols_add': {
														
 
															-            'unit': 'ktCO2eq',
														
 
															-            'category': '0',
														
 
															+    "GHG": {
														
 
															+        "tables": [2],
														
 
															+        "cols_add": {
														
 
															+            "unit": "ktCO2eq",
														
 
															+            "category": "0",
														
 
															         },
														
 
															-        'given_col': 'entity',
														
 
															-        'take_only': ['Total GHG'],
														
 
															+        "given_col": "entity",
														
 
															+        "take_only": ["Total GHG"],
														
 
															     },
														
 
															-    'CO2': {
														
 
															-        'tables': [3],
														
 
															-        'cols_add': {
														
 
															-            'unit': 'kt',
														
 
															-            'entity': 'CO2',
														
 
															+    "CO2": {
														
 
															+        "tables": [3],
														
 
															+        "cols_add": {
														
 
															+            "unit": "kt",
														
 
															+            "entity": "CO2",
														
 
															         },
														
 
															-        'given_col': 'category',
														
 
															+        "given_col": "category",
														
 
															     },
														
 
															-    'CH4': {
														
 
															-        'tables': [5],
														
 
															-        'cols_add': {
														
 
															-            'unit': 'kt',
														
 
															-            'entity': 'CH4',
														
 
															+    "CH4": {
														
 
															+        "tables": [5],
														
 
															+        "cols_add": {
														
 
															+            "unit": "kt",
														
 
															+            "entity": "CH4",
														
 
															         },
														
 
															-        'given_col': 'category',
														
 
															-        'take_only': [
														
 
															-            'Total emissions', 'From fuel combustion',
														
 
															-            'From Industrial processes', 'From Agriculture'
														
 
															-        ], # ignore the waste time series as they don't cover the full sector
														
 
															+        "given_col": "category",
														
 
															+        "take_only": [
														
 
															+            "Total emissions",
														
 
															+            "From fuel combustion",
														
 
															+            "From Industrial processes",
														
 
															+            "From Agriculture",
														
 
															+        ],  # ignore the waste time series as they don't cover the full sector
														
 
															         # and lead to problems becaus eof the methodology chnage in the inventory
														
 
															     },
														
 
															-    'N2O': {
														
 
															-        'tables': [6],
														
 
															-        'cols_add': {
														
 
															-            'unit': 'kt',
														
 
															-            'entity': 'N2O',
														
 
															+    "N2O": {
														
 
															+        "tables": [6],
														
 
															+        "cols_add": {
														
 
															+            "unit": "kt",
														
 
															+            "entity": "N2O",
														
 
															         },
														
 
															-        'given_col': 'category',
														
 
															+        "given_col": "category",
														
 
															     },
														
 
															-    'FGases': {
														
 
															-        'tables': [7],
														
 
															-        'cols_add': {
														
 
															-            'unit': 'ktCO2eq',
														
 
															-            'category': '0',
														
 
															+    "FGases": {
														
 
															+        "tables": [7],
														
 
															+        "cols_add": {
														
 
															+            "unit": "ktCO2eq",
														
 
															+            "category": "0",
														
 
															         },
														
 
															-        'given_col': 'entity',
														
 
															+        "given_col": "entity",
														
 
															     },
														
 
															 }
														
 
															 #### configuration for inventory tables
														
 
															 inv_tab_conf = {
														
 
															-    'unit_row': 0,
														
 
															-    'entity_row': 0,
														
 
															-    'regex_unit': r"\((.*)\)",
														
 
															-    'regex_entity': r"^(.*)\s\(",
														
 
															-    'index_cols': 'category',
														
 
															-    'cat_pos': (0, 0),
														
 
															-    'header_long': ["category", "entity", "unit", "time", "data"],
														
 
															-    'header_2010': ["2010", "CO2 emissions (Gg)", "CO2 removals (Gg)",
														
 
															-                  "CH4 (Gg)", "N2O (Gg)", "CO (Gg)", "NOx (Gg)",
														
 
															-                  "NMVOCs (Gg)", "SOx (Gg)", "SF6 (CO2eq Gg)",
														
 
															-                  "HFCs (CO2eq Gg)", "PFCs (CO2eq Gg)"],
														
 
															-    'unit_repl': {
														
 
															+    "unit_row": 0,
														
 
															+    "entity_row": 0,
														
 
															+    "regex_unit": r"\((.*)\)",
														
 
															+    "regex_entity": r"^(.*)\s\(",
														
 
															+    "index_cols": "category",
														
 
															+    "cat_pos": (0, 0),
														
 
															+    "header_long": ["category", "entity", "unit", "time", "data"],
														
 
															+    "header_2010": [
														
 
															+        "2010",
														
 
															+        "CO2 emissions (Gg)",
														
 
															+        "CO2 removals (Gg)",
														
 
															+        "CH4 (Gg)",
														
 
															+        "N2O (Gg)",
														
 
															+        "CO (Gg)",
														
 
															+        "NOx (Gg)",
														
 
															+        "NMVOCs (Gg)",
														
 
															+        "SOx (Gg)",
														
 
															+        "SF6 (CO2eq Gg)",
														
 
															+        "HFCs (CO2eq Gg)",
														
 
															+        "PFCs (CO2eq Gg)",
														
 
															+    ],
														
 
															+    "unit_repl": {
														
 
															         "SF6 (CO2e Gg)": "GgCO2eq",
														
 
															         "HFCs (CO2eGg)": "GgCO2eq",
														
 
															         "PFCs (CO2e Gg)": "GgCO2eq",
														
@@ -78,13 +96,13 @@ inv_tab_conf = {
 
															 }
														
 
															 inv_table_def = {
														
 
															-    '1996': {'tables': [1, 2]},
														
 
															-    '2000': {'tables': [3, 4]},
														
 
															-    '2005': {'tables': [5, 6]},
														
 
															-    '2010': {'tables': [7, 8]},
														
 
															-    '2015': {'tables': [9, 10, 11]},
														
 
															-    '2019': {'tables': [12, 13, 14]},
														
 
															-    '2020': {'tables': [15, 16]},
														
 
															+    "1996": {"tables": [1, 2]},
														
 
															+    "2000": {"tables": [3, 4]},
														
 
															+    "2005": {"tables": [5, 6]},
														
 
															+    "2010": {"tables": [7, 8]},
														
 
															+    "2015": {"tables": [9, 10, 11]},
														
 
															+    "2019": {"tables": [12, 13, 14]},
														
 
															+    "2020": {"tables": [15, 16]},
														
 
															 }
														
 
															 #### configuration for PM2 format
														
@@ -110,114 +128,114 @@ coords_defaults = {
 
															 coords_value_mapping = {
														
 
															     "unit": "PRIMAP1",
														
 
															     "category": {
														
 
															-        'Total national emissions and removals': '24540',
														
 
															-        '0': '24540', # no mapping, just for completeness
														
 
															-        'Total emissions and removals': '24540',
														
 
															-        'Total emissions': '24540',
														
 
															-        '1. Energy': '1',
														
 
															-        'A. Fuel combustion (sectoral approach)': '1.A',
														
 
															-        'A. From fuel combustion': '1.A',
														
 
															-        'From fuel combustion': '1.A',
														
 
															-        '1. Energy industries': '1.A.1',
														
 
															-        '2. Manufacturing industries and construction': '1.A.2',
														
 
															-        '2. Manufacturing, industries and construction': '1.A.2',
														
 
															-        '3. Transport': '1.A.3',
														
 
															-        '4. Other sectors': '1.A.4',
														
 
															-        '4. Other': '1.A.4',
														
 
															-        'Commercial, institutional residential sectors': '1.A.4.ab', # not BURDI
														
 
															-        'Commercial, institutional': '1.A.4.a', #not BURDI
														
 
															-        'residential sectors': '1.A.4.b', #not BURDI
														
 
															-        'Agriculture, forestry and fishing': '1.A.4.c', # not BURDI
														
 
															-        '5. Other (please specify)': '1.A.5',
														
 
															-        'B. Fugitive emissions from fuels': '1.B',
														
 
															-        '1. Solid fuels': '1.B.1',
														
 
															-        '2. Oil and natural gas': '1.B.2',
														
 
															-        '2. Industrial processes': '2',
														
 
															-        'B. industrial processes': '2',
														
 
															-        'From Industrial processes': '2',
														
 
															-        'A. Mineral products': '2.A',
														
 
															-        'CEMENT PRODUCTION': '2.A.1',
														
 
															-        'PRODUCTION OF LIME': '2.A.2',
														
 
															-        'SODA ASH USE': '2.A.4.b',
														
 
															-        'ROAD PAVING WITH ASPHALT': '2.A.6',
														
 
															-        'Container Glass': '2.A.7.a',
														
 
															-        'B. Chemical industry': '2.B',
														
 
															-        'NITRIC ACID PRODUCTION': '2.B.2',
														
 
															-        'Ethylene': '2.B.5.b',
														
 
															-        'PRODUCTION OF OTHER CHEMICALS': '2.B.5.g', #not BURDI
														
 
															-        'Sulphuric Acid': '2.B.5.f', #not BURDI
														
 
															-        'C. Metal production': '2.C',
														
 
															-        'D. Other production': '2.D',
														
 
															-        'E. Production of halocarbons and sulphur hexafluoride': '2.E',
														
 
															-        'F. Consumption of halocarbons and sulphur hexafluoride': '2.F',
														
 
															-        'G. Other (IPPU)': '2.G',
														
 
															-        '3. Solvent and other product use': '3',
														
 
															-        '4. Agriculture': '4',
														
 
															-        'From Agriculture': '4',
														
 
															-        'From agriculture': '4',
														
 
															-        'A. Enteric fermentation': '4.A',
														
 
															-        'B. Manure management': '4.B',
														
 
															-        'C. Rice cultivation': '4.C',
														
 
															-        'D. Agricultural soils': '4.D',
														
 
															-        'E. Prescribed burning of savannahs': '4.E',
														
 
															-        'F. Field burning of agricultural residues': '4.F',
														
 
															-        'G. Other (Agri)': '4.G',
														
 
															-        '5. Land-use change and forestry': '5',
														
 
															-        'C. Land-use change and forestry': '5',
														
 
															-        'A. Changes in forest and other woody biomass stocks': '5.A',
														
 
															-        '2. Changes in forest and other woody biomass stocks': '5.A',
														
 
															-        'B. Forest and grassland conversion': '5.B',
														
 
															-        'C. Abandonment of managed lands': '5.C',
														
 
															-        'D. CO2 emissions and removals from soil': '5.D',
														
 
															-        '1. CO2 emissions and removals from soil': '5.D',
														
 
															-        'E. Other (LULUCF)': '5.E',
														
 
															+        "Total national emissions and removals": "24540",
														
 
															+        "0": "24540",  # no mapping, just for completeness
														
 
															+        "Total emissions and removals": "24540",
														
 
															+        "Total emissions": "24540",
														
 
															+        "1. Energy": "1",
														
 
															+        "A. Fuel combustion (sectoral approach)": "1.A",
														
 
															+        "A. From fuel combustion": "1.A",
														
 
															+        "From fuel combustion": "1.A",
														
 
															+        "1. Energy industries": "1.A.1",
														
 
															+        "2. Manufacturing industries and construction": "1.A.2",
														
 
															+        "2. Manufacturing, industries and construction": "1.A.2",
														
 
															+        "3. Transport": "1.A.3",
														
 
															+        "4. Other sectors": "1.A.4",
														
 
															+        "4. Other": "1.A.4",
														
 
															+        "Commercial, institutional residential sectors": "1.A.4.ab",  # not BURDI
														
 
															+        "Commercial, institutional": "1.A.4.a",  # not BURDI
														
 
															+        "residential sectors": "1.A.4.b",  # not BURDI
														
 
															+        "Agriculture, forestry and fishing": "1.A.4.c",  # not BURDI
														
 
															+        "5. Other (please specify)": "1.A.5",
														
 
															+        "B. Fugitive emissions from fuels": "1.B",
														
 
															+        "1. Solid fuels": "1.B.1",
														
 
															+        "2. Oil and natural gas": "1.B.2",
														
 
															+        "2. Industrial processes": "2",
														
 
															+        "B. industrial processes": "2",
														
 
															+        "From Industrial processes": "2",
														
 
															+        "A. Mineral products": "2.A",
														
 
															+        "CEMENT PRODUCTION": "2.A.1",
														
 
															+        "PRODUCTION OF LIME": "2.A.2",
														
 
															+        "SODA ASH USE": "2.A.4.b",
														
 
															+        "ROAD PAVING WITH ASPHALT": "2.A.6",
														
 
															+        "Container Glass": "2.A.7.a",
														
 
															+        "B. Chemical industry": "2.B",
														
 
															+        "NITRIC ACID PRODUCTION": "2.B.2",
														
 
															+        "Ethylene": "2.B.5.b",
														
 
															+        "PRODUCTION OF OTHER CHEMICALS": "2.B.5.g",  # not BURDI
														
 
															+        "Sulphuric Acid": "2.B.5.f",  # not BURDI
														
 
															+        "C. Metal production": "2.C",
														
 
															+        "D. Other production": "2.D",
														
 
															+        "E. Production of halocarbons and sulphur hexafluoride": "2.E",
														
 
															+        "F. Consumption of halocarbons and sulphur hexafluoride": "2.F",
														
 
															+        "G. Other (IPPU)": "2.G",
														
 
															+        "3. Solvent and other product use": "3",
														
 
															+        "4. Agriculture": "4",
														
 
															+        "From Agriculture": "4",
														
 
															+        "From agriculture": "4",
														
 
															+        "A. Enteric fermentation": "4.A",
														
 
															+        "B. Manure management": "4.B",
														
 
															+        "C. Rice cultivation": "4.C",
														
 
															+        "D. Agricultural soils": "4.D",
														
 
															+        "E. Prescribed burning of savannahs": "4.E",
														
 
															+        "F. Field burning of agricultural residues": "4.F",
														
 
															+        "G. Other (Agri)": "4.G",
														
 
															+        "5. Land-use change and forestry": "5",
														
 
															+        "C. Land-use change and forestry": "5",
														
 
															+        "A. Changes in forest and other woody biomass stocks": "5.A",
														
 
															+        "2. Changes in forest and other woody biomass stocks": "5.A",
														
 
															+        "B. Forest and grassland conversion": "5.B",
														
 
															+        "C. Abandonment of managed lands": "5.C",
														
 
															+        "D. CO2 emissions and removals from soil": "5.D",
														
 
															+        "1. CO2 emissions and removals from soil": "5.D",
														
 
															+        "E. Other (LULUCF)": "5.E",
														
 
															         # waste in 2006 categories, not BURDI as we will lose info of we map to BURDI and back
														
 
															-        '6. Waste': '6',
														
 
															-        'A. Solid waste disposal on land': '6.A',
														
 
															-        'From solid waste disposal on land': '6.A',
														
 
															-        'B. Waste-water handling': '6X.B', # combine with 6.D
														
 
															-        'From waste-water treatment': '6X.B', # not BURDI
														
 
															-        'C. Waste incineration': '6.C',
														
 
															-        'D. Other (please specify)': '6X.D', # combine with 6.E
														
 
															-        'B. Biological Treatment of Solid Waste': '6.B', # not BURDI
														
 
															-        'D.Waste-water handling': '6.D', # not BURDI
														
 
															-        'D. Waste-water handling': '6.D', # not BURDI
														
 
															-        'E. Other (Waste)': '6.E', # not BURDI
														
 
															-        '7. Other (please specify)': '7',
														
 
															-        'International bunkers': '14637',
														
 
															-        'Aviation': '14424',
														
 
															-        'Marine': '14423',
														
 
															-        'CO2 emissions from biomass': '14638',
														
 
															+        "6. Waste": "6",
														
 
															+        "A. Solid waste disposal on land": "6.A",
														
 
															+        "From solid waste disposal on land": "6.A",
														
 
															+        "B. Waste-water handling": "6X.B",  # combine with 6.D
														
 
															+        "From waste-water treatment": "6X.B",  # not BURDI
														
 
															+        "C. Waste incineration": "6.C",
														
 
															+        "D. Other (please specify)": "6X.D",  # combine with 6.E
														
 
															+        "B. Biological Treatment of Solid Waste": "6.B",  # not BURDI
														
 
															+        "D.Waste-water handling": "6.D",  # not BURDI
														
 
															+        "D. Waste-water handling": "6.D",  # not BURDI
														
 
															+        "E. Other (Waste)": "6.E",  # not BURDI
														
 
															+        "7. Other (please specify)": "7",
														
 
															+        "International bunkers": "14637",
														
 
															+        "Aviation": "14424",
														
 
															+        "Marine": "14423",
														
 
															+        "CO2 emissions from biomass": "14638",
														
 
															     },
														
 
															     "entity": {
														
 
															-        'Total GHG': f'KYOTOGHG ({gwp_to_use})',
														
 
															-        'Carbon Dioxide (CO2)': 'CO2',
														
 
															-        'CO2': 'CO2', # no mapping, just added for completeness here
														
 
															-        'CO2 emissions': 'CO2 emissions', # no mapping, just added for completeness here
														
 
															-        'CO2 removals': 'CO2 removals', # no mapping, just added for completeness here
														
 
															-        'CO2 Emissions': 'CO2 emissions',
														
 
															-        'CO2 Removals': 'CO2 removals',
														
 
															-        'Methane (CH4)': 'CH4',
														
 
															-        'CH4': 'CH4', # no mapping, just added for completeness here
														
 
															-        'Nitrous Oxides (N2O)': 'N2O',
														
 
															-        'NO2': 'NO2', # no mapping, just added for completeness here
														
 
															-        'Sulfur hexafluoride (SF6)': f'SF6 ({gwp_to_use})',
														
 
															-        'SF6': f'SF6 ({gwp_to_use})',
														
 
															-        "Hydrofluorocarbons (HFC'S)": f'HFCS ({gwp_to_use})',
														
 
															-        "HFCs": f'HFCS ({gwp_to_use})',
														
 
															-        "Perfluorocarbons (PFC'S)": f'PFCS ({gwp_to_use})',
														
 
															-        "PFCs": f'PFCS ({gwp_to_use})',
														
 
															-        'NOx': 'NOX',
														
 
															-        'Nox': 'NOX',
														
 
															-        'Co': 'CO',
														
 
															-        'CO': 'CO', # no mapping, just added for completeness here
														
 
															-        'NMVOCs': 'NMVOC',
														
 
															-        'SOx': 'SOX', # no mapping, just added for completeness here
														
 
															+        "Total GHG": f"KYOTOGHG ({gwp_to_use})",
														
 
															+        "Carbon Dioxide (CO2)": "CO2",
														
 
															+        "CO2": "CO2",  # no mapping, just added for completeness here
														
 
															+        "CO2 emissions": "CO2 emissions",  # no mapping, just added for completeness here
														
 
															+        "CO2 removals": "CO2 removals",  # no mapping, just added for completeness here
														
 
															+        "CO2 Emissions": "CO2 emissions",
														
 
															+        "CO2 Removals": "CO2 removals",
														
 
															+        "Methane (CH4)": "CH4",
														
 
															+        "CH4": "CH4",  # no mapping, just added for completeness here
														
 
															+        "Nitrous Oxides (N2O)": "N2O",
														
 
															+        "NO2": "NO2",  # no mapping, just added for completeness here
														
 
															+        "Sulfur hexafluoride (SF6)": f"SF6 ({gwp_to_use})",
														
 
															+        "SF6": f"SF6 ({gwp_to_use})",
														
 
															+        "Hydrofluorocarbons (HFC'S)": f"HFCS ({gwp_to_use})",
														
 
															+        "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+        "Perfluorocarbons (PFC'S)": f"PFCS ({gwp_to_use})",
														
 
															+        "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+        "NOx": "NOX",
														
 
															+        "Nox": "NOX",
														
 
															+        "Co": "CO",
														
 
															+        "CO": "CO",  # no mapping, just added for completeness here
														
 
															+        "NMVOCs": "NMVOC",
														
 
															+        "SOx": "SOX",  # no mapping, just added for completeness here
														
 
															     },
														
 
															 }
														
 
															 filter_remove = {
														
 
															-    'rem_cat': {'category': ['Memo items', 'G. Other (please specify)']},
														
 
															+    "rem_cat": {"category": ["Memo items", "G. Other (please specify)"]},
														
 
															     #'rem_ent': {'entity': ['GHG per capita', 'GHG per GDP (2015 prices)']},
														
 
															 }
														
@@ -235,76 +253,88 @@ meta_data = {
 
															 #### for processing
														
 
															 # aggregate categories
														
 
															 cats_to_agg = {
														
 
															-    '1': {'sources': ['1.A'], 'name': 'Energy'}, # for trends
														
 
															-    '1.A.4': {'sources': ['1.A.4.a', '1.A.4.b', '1.A.4.c', '1.A.4.ab'],
														
 
															-              'name': 'Other sectors'},
														
 
															-    '2.A.4': {'sources': ['2.A.4.b'], 'name': 'Soda Ash'},
														
 
															-    '2.A.7': {'sources': ['2.A.7.a'], 'name': 'Other'},
														
 
															-    '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4', '2.A.6', '2.A.7'], 'name': 'Mineral Products'},
														
 
															-    '2.B.5': {'sources': ['2.B.5.f', '2.B.5.g'], 'name': 'Other'},
														
 
															-    '2.B': {'sources': ['2.B.2', '2.B.5'], 'name': 'Chemical Industry'},
														
 
															-    '6.D': {'sources': ['6.D', '6X.B'], 'name': 'Wastewater Treatment and Discharge'},
														
 
															+    "1": {"sources": ["1.A"], "name": "Energy"},  # for trends
														
 
															+    "1.A.4": {
														
 
															+        "sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c", "1.A.4.ab"],
														
 
															+        "name": "Other sectors",
														
 
															+    },
														
 
															+    "2.A.4": {"sources": ["2.A.4.b"], "name": "Soda Ash"},
														
 
															+    "2.A.7": {"sources": ["2.A.7.a"], "name": "Other"},
														
 
															+    "2.A": {
														
 
															+        "sources": ["2.A.1", "2.A.2", "2.A.4", "2.A.6", "2.A.7"],
														
 
															+        "name": "Mineral Products",
														
 
															+    },
														
 
															+    "2.B.5": {"sources": ["2.B.5.f", "2.B.5.g"], "name": "Other"},
														
 
															+    "2.B": {"sources": ["2.B.2", "2.B.5"], "name": "Chemical Industry"},
														
 
															+    "6.D": {"sources": ["6.D", "6X.B"], "name": "Wastewater Treatment and Discharge"},
														
 
															     #'6.E': {'sources': ['6.E', '6X.D'], 'Other'}, # currently empty
														
 
															 }
														
 
															 # downscale
														
 
															 # 1.A.4.ab
														
 
															 downscaling = {
														
 
															-    'sectors': {
														
 
															-        '24540': {
														
 
															-            'basket': '24540',
														
 
															-            'basket_contents': ['2'],
														
 
															-            'entities': ['SF6', 'HFCS (SARGWP100)', 'PFCS (SARGWP100)'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+    "sectors": {
														
 
															+        "24540": {
														
 
															+            "basket": "24540",
														
 
															+            "basket_contents": ["2"],
														
 
															+            "entities": ["SF6", "HFCS (SARGWP100)", "PFCS (SARGWP100)"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '1.A': {
														
 
															-            'basket': '1.A',
														
 
															-            'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															-            'tolerance': 0.05, # some inconsistencies (rounding?)
														
 
															+        "1.A": {
														
 
															+            "basket": "1.A",
														
 
															+            "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
														
 
															+            "entities": ["CO2", "CH4", "N2O"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															+            "tolerance": 0.05,  # some inconsistencies (rounding?)
														
 
															         },
														
 
															-        '1.A.4.ab': {
														
 
															-            'basket': '1.A.4.ab',
														
 
															-            'basket_contents': ['1.A.4.a', '1.A.4.b'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O', 'SOX', 'NOX', 'CO'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "1.A.4.ab": {
														
 
															+            "basket": "1.A.4.ab",
														
 
															+            "basket_contents": ["1.A.4.a", "1.A.4.b"],
														
 
															+            "entities": ["CO2", "CH4", "N2O", "SOX", "NOX", "CO"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '1.A.4': {
														
 
															-            'basket': '1.A.4',
														
 
															-            'basket_contents': ['1.A.4.a', '1.A.4.b', '1.A.4.c'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "1.A.4": {
														
 
															+            "basket": "1.A.4",
														
 
															+            "basket_contents": ["1.A.4.a", "1.A.4.b", "1.A.4.c"],
														
 
															+            "entities": ["CO2", "CH4", "N2O"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '2': {
														
 
															-            'basket': '2',
														
 
															-            'basket_contents': ['2.A', '2.B', '2.F'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O', 'SF6', 'PFCS (SARGWP100)', 'HFCS (SARGWP100)'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "2": {
														
 
															+            "basket": "2",
														
 
															+            "basket_contents": ["2.A", "2.B", "2.F"],
														
 
															+            "entities": [
														
 
															+                "CO2",
														
 
															+                "CH4",
														
 
															+                "N2O",
														
 
															+                "SF6",
														
 
															+                "PFCS (SARGWP100)",
														
 
															+                "HFCS (SARGWP100)",
														
 
															+            ],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '2.A': {
														
 
															-            'basket': '2.A',
														
 
															-            'basket_contents': ['2.A.1', '2.A.2', '2.A.4', '2.A.7'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "2.A": {
														
 
															+            "basket": "2.A",
														
 
															+            "basket_contents": ["2.A.1", "2.A.2", "2.A.4", "2.A.7"],
														
 
															+            "entities": ["CO2", "CH4", "N2O"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '2.B': {
														
 
															-            'basket': '2.B',
														
 
															-            'basket_contents': ['2.B.2', '2.B.5'],
														
 
															-            'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "2.B": {
														
 
															+            "basket": "2.B",
														
 
															+            "basket_contents": ["2.B.2", "2.B.5"],
														
 
															+            "entities": ["CO2", "CH4", "N2O"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '4': {
														
 
															-            'basket': '4',
														
 
															-            'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E', '4.F', '4.G'],
														
 
															-            'entities': ['CH4', 'N2O'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "4": {
														
 
															+            "basket": "4",
														
 
															+            "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E", "4.F", "4.G"],
														
 
															+            "entities": ["CH4", "N2O"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															-        '5': {
														
 
															-            'basket': '5',
														
 
															-            'basket_contents': ['5.A', '5.D'], # the other sectors are 0
														
 
															-            'entities': ['CO2'],
														
 
															-            'dim': f"category ({coords_terminologies['category']})",
														
 
															+        "5": {
														
 
															+            "basket": "5",
														
 
															+            "basket_contents": ["5.A", "5.D"],  # the other sectors are 0
														
 
															+            "entities": ["CO2"],
														
 
															+            "dim": f"category ({coords_terminologies['category']})",
														
 
															         },
														
 
															     },
														
 
															 }
														
@@ -312,125 +342,190 @@ downscaling = {
 
															 # map to IPCC2006
														
 
															 cat_conversion = {
														
 
															     # ANNEXI to come (low priority as we read from CRF files)
														
 
															-    'mapping': {
														
 
															-        '1': '1',
														
 
															-        '1.A': '1.A',
														
 
															-        '1.A.1': '1.A.1',
														
 
															-        '1.A.2': '1.A.2',
														
 
															-        '1.A.3': '1.A.3',
														
 
															-        '1.A.4': '1.A.4',
														
 
															-        '1.A.4.a': '1.A.4.a',
														
 
															-        '1.A.4.b': '1.A.4.b',
														
 
															-        '1.A.4.c': '1.A.4.c',
														
 
															-        '1.A.5': '1.A.5', # currently not needed
														
 
															-        '1.B': '1.B', # currently not needed
														
 
															-        '1.B.1': '1.B.1', # currently not needed
														
 
															-        '1.B.2': '1.B.2', # currently not needed
														
 
															-        '2': '2',
														
 
															-        '2.A': '2.A',
														
 
															-        '2.A.1': '2.A.1', # cement
														
 
															-        '2.A.2': '2.A.2', # lime
														
 
															-        '2.A.4': '2.A.4.b', # soda ash
														
 
															-        '2.A.6': '2.A.5', # road paving with asphalt -> other
														
 
															-        '2.A.7.a': '2.A.3', # glass
														
 
															-        '2.B': 'M.2.B_2.B',
														
 
															-        '2.B.2': '2.B.2', # nitric acid
														
 
															-        '2.B.5.b': '2.B.8.b', # Ethylene
														
 
															-        '2.B.5.f': 'M.2.B.10.a', # sulphuric acid
														
 
															-        '2.B.5.g': 'M.2.B.10.b', # other chemicals
														
 
															-        '2.C': '2.C',
														
 
															-        '2.D': 'M.2.H.1_2',
														
 
															-        '2.E': '2.B.9',
														
 
															-        '2.F': '2.F',
														
 
															-        '2.G': '2.H.3',
														
 
															-        '4': 'M.AG',
														
 
															-        '4.A': '3.A.1',
														
 
															-        '4.B': '3.A.2',
														
 
															-        '4.C': '3.C.7',
														
 
															-        '4.D': 'M.3.C.45.AG',
														
 
															-        '4.E': '3.C.1.c',
														
 
															-        '4.F': '3.C.1.b',
														
 
															-        '4.G': '3.C.8',
														
 
															-        '5': 'M.LULUCF',
														
 
															-        '6': '4',
														
 
															-        '6.A': '4.A',
														
 
															-        '6.B': '4.B',
														
 
															-        '6.C': '4.C',
														
 
															-        '6.D': '4.D',
														
 
															-        '24540': '0',
														
 
															-        '15163': 'M.0.EL',
														
 
															-        '14637': 'M.BK',
														
 
															-        '14424': 'M.BK.A',
														
 
															-        '14423': 'M.BK.M',
														
 
															-        '14638': 'M.BIO',
														
 
															-        '7': '5',
														
 
															-    }, #5.A-D ignored as not fitting 2006 cats
														
 
															-
														
 
															-    'aggregate': {
														
 
															-        '2.A.4': {'sources': ['2.A.4.b'], 'name': 'Other uses of soda ashes'},
														
 
															-        '2.B.8': {'sources': ['2.B.8.b'], 'name': 'Petrochemical and Carbon Black production'},
														
 
															-        '2.B.10': {'sources': ['M.2.B.10.a', 'M.2.B.10.b'], 'name': 'Other'},
														
 
															-        '2.B': {'sources': ['2.B.2', '2.B.8', '2.B.9', '2.B.10'], 'name': 'Chemical Industry'},
														
 
															-        '2.H': {'sources': ['M.2.H.1_2', '2.H.3'], 'name': 'Other'},
														
 
															+    "mapping": {
														
 
															+        "1": "1",
														
 
															+        "1.A": "1.A",
														
 
															+        "1.A.1": "1.A.1",
														
 
															+        "1.A.2": "1.A.2",
														
 
															+        "1.A.3": "1.A.3",
														
 
															+        "1.A.4": "1.A.4",
														
 
															+        "1.A.4.a": "1.A.4.a",
														
 
															+        "1.A.4.b": "1.A.4.b",
														
 
															+        "1.A.4.c": "1.A.4.c",
														
 
															+        "1.A.5": "1.A.5",  # currently not needed
														
 
															+        "1.B": "1.B",  # currently not needed
														
 
															+        "1.B.1": "1.B.1",  # currently not needed
														
 
															+        "1.B.2": "1.B.2",  # currently not needed
														
 
															+        "2": "2",
														
 
															+        "2.A": "2.A",
														
 
															+        "2.A.1": "2.A.1",  # cement
														
 
															+        "2.A.2": "2.A.2",  # lime
														
 
															+        "2.A.4": "2.A.4.b",  # soda ash
														
 
															+        "2.A.6": "2.A.5",  # road paving with asphalt -> other
														
 
															+        "2.A.7.a": "2.A.3",  # glass
														
 
															+        "2.B": "M.2.B_2.B",
														
 
															+        "2.B.2": "2.B.2",  # nitric acid
														
 
															+        "2.B.5.b": "2.B.8.b",  # Ethylene
														
 
															+        "2.B.5.f": "M.2.B.10.a",  # sulphuric acid
														
 
															+        "2.B.5.g": "M.2.B.10.b",  # other chemicals
														
 
															+        "2.C": "2.C",
														
 
															+        "2.D": "M.2.H.1_2",
														
 
															+        "2.E": "2.B.9",
														
 
															+        "2.F": "2.F",
														
 
															+        "2.G": "2.H.3",
														
 
															+        "4": "M.AG",
														
 
															+        "4.A": "3.A.1",
														
 
															+        "4.B": "3.A.2",
														
 
															+        "4.C": "3.C.7",
														
 
															+        "4.D": "M.3.C.45.AG",
														
 
															+        "4.E": "3.C.1.c",
														
 
															+        "4.F": "3.C.1.b",
														
 
															+        "4.G": "3.C.8",
														
 
															+        "5": "M.LULUCF",
														
 
															+        "6": "4",
														
 
															+        "6.A": "4.A",
														
 
															+        "6.B": "4.B",
														
 
															+        "6.C": "4.C",
														
 
															+        "6.D": "4.D",
														
 
															+        "24540": "0",
														
 
															+        "15163": "M.0.EL",
														
 
															+        "14637": "M.BK",
														
 
															+        "14424": "M.BK.A",
														
 
															+        "14423": "M.BK.M",
														
 
															+        "14638": "M.BIO",
														
 
															+        "7": "5",
														
 
															+    },  # 5.A-D ignored as not fitting 2006 cats
														
 
															+    "aggregate": {
														
 
															+        "2.A.4": {"sources": ["2.A.4.b"], "name": "Other uses of soda ashes"},
														
 
															+        "2.B.8": {
														
 
															+            "sources": ["2.B.8.b"],
														
 
															+            "name": "Petrochemical and Carbon Black production",
														
 
															+        },
														
 
															+        "2.B.10": {"sources": ["M.2.B.10.a", "M.2.B.10.b"], "name": "Other"},
														
 
															+        "2.B": {
														
 
															+            "sources": ["2.B.2", "2.B.8", "2.B.9", "2.B.10"],
														
 
															+            "name": "Chemical Industry",
														
 
															+        },
														
 
															+        "2.H": {"sources": ["M.2.H.1_2", "2.H.3"], "name": "Other"},
														
 
															         # '2': {'sources': ['2.A', '2.B', '2.C', '2.F', '2.H'],
														
 
															         #       'name': 'Industrial Processes and Product Use'},
														
 
															-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-        '3.C.1': {'sources': ['3.C.1.b', '3.C.1.c'],
														
 
															-                     'name': 'Emissions from biomass burning'},
														
 
															-        'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'],
														
 
															-                     'name': 'Emissions from biomass burning (Agriculture)'},
														
 
															-        '3.C': {'sources': ['3.C.1', 'M.3.C.45.AG', '3.C.7', '3.C.8'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-        'M.3.C.AG': {'sources': ['M.3.C.1.AG', 'M.3.C.45.AG', '3.C.7', '3.C.8'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land ('
														
 
															-                             'Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
														
 
															-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National total '
														
 
															-                                                                    'excluding LULUCF'},
														
 
															+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+        "3.C.1": {
														
 
															+            "sources": ["3.C.1.b", "3.C.1.c"],
														
 
															+            "name": "Emissions from biomass burning",
														
 
															+        },
														
 
															+        "M.3.C.1.AG": {
														
 
															+            "sources": ["3.C.1.b", "3.C.1.c"],
														
 
															+            "name": "Emissions from biomass burning (Agriculture)",
														
 
															+        },
														
 
															+        "3.C": {
														
 
															+            "sources": ["3.C.1", "M.3.C.45.AG", "3.C.7", "3.C.8"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+        },
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7", "3.C.8"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land ("
														
 
															+            "Agriculture)",
														
 
															+        },
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG"],
														
 
															+            "name": "Agriculture excluding livestock",
														
 
															+        },
														
 
															+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															+        "M.0.EL": {
														
 
															+            "sources": ["1", "2", "M.AG", "4", "5"],
														
 
															+            "name": "National total " "excluding LULUCF",
														
 
															+        },
														
 
															     },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': 'SARGWP100',
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": "SARGWP100",
														
 
															     },
														
 
															 }
														
 
															 sectors_to_save = [
														
 
															-    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.4.a', '1.A.4.b', '1.A.4.c',
														
 
															-    '1.A.5',
														
 
															-    '1.B', '1.B.1', '1.B.2',
														
 
															-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.5',
														
 
															-    '2.B', '2.B.2', '2.B.8', '2.B.9', '2.B.10', '2.C', '2.F', '2.H',
														
 
															-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
														
 
															-    '3.C', '3.C.1', 'M.3.C.1.AG', '3.C.7', 'M.3.C.45.AG', '3.C.8', 'M.3.C.AG',
														
 
															-    'M.LULUCF', 'M.AG.ELV',
														
 
															-    '4', '4.A', '4.B', '4.C', '4.D',
														
 
															-    '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO', '5']
														
 
															+    "1",
														
 
															+    "1.A",
														
 
															+    "1.A.1",
														
 
															+    "1.A.2",
														
 
															+    "1.A.3",
														
 
															+    "1.A.4",
														
 
															+    "1.A.4.a",
														
 
															+    "1.A.4.b",
														
 
															+    "1.A.4.c",
														
 
															+    "1.A.5",
														
 
															+    "1.B",
														
 
															+    "1.B.1",
														
 
															+    "1.B.2",
														
 
															+    "2",
														
 
															+    "2.A",
														
 
															+    "2.A.1",
														
 
															+    "2.A.2",
														
 
															+    "2.A.3",
														
 
															+    "2.A.4",
														
 
															+    "2.A.5",
														
 
															+    "2.B",
														
 
															+    "2.B.2",
														
 
															+    "2.B.8",
														
 
															+    "2.B.9",
														
 
															+    "2.B.10",
														
 
															+    "2.C",
														
 
															+    "2.F",
														
 
															+    "2.H",
														
 
															+    "3",
														
 
															+    "M.AG",
														
 
															+    "3.A",
														
 
															+    "3.A.1",
														
 
															+    "3.A.2",
														
 
															+    "3.C",
														
 
															+    "3.C.1",
														
 
															+    "M.3.C.1.AG",
														
 
															+    "3.C.7",
														
 
															+    "M.3.C.45.AG",
														
 
															+    "3.C.8",
														
 
															+    "M.3.C.AG",
														
 
															+    "M.LULUCF",
														
 
															+    "M.AG.ELV",
														
 
															+    "4",
														
 
															+    "4.A",
														
 
															+    "4.B",
														
 
															+    "4.C",
														
 
															+    "4.D",
														
 
															+    "0",
														
 
															+    "M.0.EL",
														
 
															+    "M.BK",
														
 
															+    "M.BK.A",
														
 
															+    "M.BK.M",
														
 
															+    "M.BIO",
														
 
															+    "5",
														
 
															+]
														
 
															 # gas baskets
														
 
															 gas_baskets = {
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)': ['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)': ['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
														
 
															+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
														
 
															+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
														
 
															+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
														
 
															 }
														
 
															 basket_copy = {
														
 
															-    'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-    'entities': ["HFCS", "PFCS"],
														
 
															-    'source_GWP': gwp_to_use,
														
 
															+    "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+    "entities": ["HFCS", "PFCS"],
														
 
															+    "source_GWP": gwp_to_use,
														
 
															 }
														
 
															+
														
 
															 #### functions
														
 
															 def is_int(input: str) -> bool:
														
 
															+    """Check if a string evaluates to an integer under a defined locale"""
														
 
															     try:
														
 
															         locale.atoi(input)
														
 
															-        return True
														
 
															-    except:
														
 
															+        return True  # noqa: TRY300
														
 
															+    except Exception:
														
 
															         return False
														
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
@@ -1,4 +1,12 @@
 
															-# read Israel's BUR2 from pdf
														
 
															+"""
														
 
															+Read Israel's BUR2 from pdf
														
 
															+
														
 
															+This script reads data from Israel's BUR2
														
 
															+Data are read from pdf using camelot
														
 
															+only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 # TODO: bunkers trend tables not read because of special format
														
@@ -9,7 +17,7 @@ import pandas as pd
 
															 import primap2 as pm2
														
 
															 # configuration import
														
 
															-from .config_isr_bur2 import (
														
 
															+from config_isr_bur2 import (
														
 
															     basket_copy,
														
 
															     cat_conversion,
														
 
															     cats_to_agg,
														
@@ -29,23 +37,27 @@ from .config_isr_bur2 import (
 
															     trend_table_def,
														
 
															 )
														
 
															-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    process_data_for_country,
														
 
															+)
														
 
															 if __name__ == "__main__":
														
 
															     ### genral configuration
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Israel' / 'BUR2'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Israel'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Israel" / "BUR2"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Israel"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'ISR_BUR2_2021_'
														
 
															-    inventory_file_pdf = '2nd_Biennial_Update_Report_2021_final.pdf'
														
 
															-    #years_to_read = range(1990, 2018 + 1)
														
 
															+    output_filename = "ISR_BUR2_2021_"
														
 
															+    inventory_file_pdf = "2nd_Biennial_Update_Report_2021_final.pdf"
														
 
															+    # years_to_read = range(1990, 2018 + 1)
														
 
															     pages_to_read_trends = range(48, 54)
														
 
															     pages_to_read_inventory = range(54, 66)
														
 
															     # define locale to use for str to float conversion
														
 
															-    locale_to_use = 'en_IL.UTF-8'
														
 
															+    locale_to_use = "en_IL.UTF-8"
														
 
															     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
														
 
															     compression = dict(zlib=True, complevel=9)
														
@@ -53,40 +65,44 @@ if __name__ == "__main__":
 
															     #### trend tables
														
 
															     # read
														
 
															-    tables_trends = camelot.read_pdf(str(input_folder / inventory_file_pdf), pages=','.join(
														
 
															-        [str(page) for page in pages_to_read_trends]), flavor='lattice')
														
 
															+    tables_trends = camelot.read_pdf(
														
 
															+        str(input_folder / inventory_file_pdf),
														
 
															+        pages=",".join([str(page) for page in pages_to_read_trends]),
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     # convert to pm2
														
 
															     table_trends = None
														
 
															     for table in trend_table_def.keys():
														
 
															         current_def = trend_table_def[table]
														
 
															         new_table = None
														
 
															-        for subtable in current_def['tables']:
														
 
															+        for subtable in current_def["tables"]:
														
 
															             if new_table is None:
														
 
															                 new_table = tables_trends[subtable].df
														
 
															             else:
														
 
															                 new_table = pd.concat([new_table, tables_trends[subtable].df])
														
 
															-        for col in new_table.columns.values:
														
 
															+        for col in new_table.columns.to_numpy():
														
 
															             new_table[col] = new_table[col].str.replace("\n", "")
														
 
															-        new_table.iloc[0, 0] = current_def['given_col']
														
 
															+        new_table.iloc[0, 0] = current_def["given_col"]
														
 
															         new_table.columns = new_table.iloc[0]
														
 
															         new_table = new_table.drop(labels=[0])
														
 
															         new_table = new_table.reset_index(drop=True)
														
 
															-        if 'take_only' in current_def.keys():
														
 
															+        if "take_only" in current_def.keys():
														
 
															             new_table = new_table[
														
 
															-                new_table[current_def['given_col']].isin(current_def['take_only'])]
														
 
															+                new_table[current_def["given_col"]].isin(current_def["take_only"])
														
 
															+            ]
														
 
															-        time_cols = [col for col in new_table.columns.values if is_int(col)]
														
 
															+        time_cols = [col for col in new_table.columns.to_numpy() if is_int(col)]
														
 
															         for col in time_cols:
														
 
															             # no NE,NA etc, just numbers, so we can just remove the ','
														
 
															-            new_table[col] = new_table[col].str.replace(',', '')
														
 
															-            new_table[col] = new_table[col].str.replace(' ', '')
														
 
															+            new_table[col] = new_table[col].str.replace(",", "")
														
 
															+            new_table[col] = new_table[col].str.replace(" ", "")
														
 
															-        for col in current_def['cols_add']:
														
 
															-            new_table[col] = current_def['cols_add'][col]
														
 
															+        for col in current_def["cols_add"]:
														
 
															+            new_table[col] = current_def["cols_add"][col]
														
 
															         if table_trends is None:
														
 
															             table_trends = new_table
														
@@ -108,31 +124,32 @@ if __name__ == "__main__":
 
															         # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-        time_format='%Y'
														
 
															+        time_format="%Y",
														
 
															     )
														
 
															-
														
 
															     data_pm2_trends = pm2.pm2io.from_interchange_format(data_if_trends)
														
 
															     #### inventory tables
														
 
															     # read inventory tables
														
 
															     tables_inv = camelot.read_pdf(
														
 
															         str(input_folder / inventory_file_pdf),
														
 
															-        pages=','.join([str(page) for page in pages_to_read_inventory]),
														
 
															-        flavor='lattice')
														
 
															+        pages=",".join([str(page) for page in pages_to_read_inventory]),
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     # process
														
 
															     table_inv = None
														
 
															     for table in inv_table_def.keys():
														
 
															         new_table = None
														
 
															         print(f"working on year {table}")
														
 
															-        for subtable in inv_table_def[table]['tables']:
														
 
															+        for subtable in inv_table_def[table]["tables"]:
														
 
															             print(f"adding table {subtable}")
														
 
															             if new_table is None:
														
 
															                 new_table = tables_inv[subtable].df
														
 
															             else:
														
 
															-                new_table = pd.concat([new_table, tables_inv[subtable].df], axis=0,
														
 
															-                                      join='outer')
														
 
															+                new_table = pd.concat(
														
 
															+                    [new_table, tables_inv[subtable].df], axis=0, join="outer"
														
 
															+                )
														
 
															                 new_table = new_table.reset_index(drop=True)
														
 
															             # replace line breaks, double, and triple spaces in category names
														
@@ -146,75 +163,97 @@ if __name__ == "__main__":
 
															         else:
														
 
															             # replace line breaks in units and entities
														
 
															             new_table.iloc[inv_tab_conf["entity_row"]] = new_table.iloc[
														
 
															-                inv_tab_conf["entity_row"]].str.replace('\n', '')
														
 
															+                inv_tab_conf["entity_row"]
														
 
															+            ].str.replace("\n", "")
														
 
															         # get_year
														
 
															         year = new_table.iloc[inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]]
														
 
															         # set category col label
														
 
															-        new_table.iloc[inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]] = 'category'
														
 
															+        new_table.iloc[
														
 
															+            inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]
														
 
															+        ] = "category"
														
 
															         new_table = pm2.pm2io.nir_add_unit_information(
														
 
															             new_table,
														
 
															-            unit_row=inv_tab_conf["unit_row"], entity_row=inv_tab_conf["entity_row"],
														
 
															-            regexp_entity=inv_tab_conf["regex_entity"], regexp_unit=inv_tab_conf[
														
 
															-                "regex_unit"],
														
 
															-            default_unit="", manual_repl_unit=inv_tab_conf["unit_repl"])
														
 
															+            unit_row=inv_tab_conf["unit_row"],
														
 
															+            entity_row=inv_tab_conf["entity_row"],
														
 
															+            regexp_entity=inv_tab_conf["regex_entity"],
														
 
															+            regexp_unit=inv_tab_conf["regex_unit"],
														
 
															+            default_unit="",
														
 
															+            manual_repl_unit=inv_tab_conf["unit_repl"],
														
 
															+        )
														
 
															         # fix individual values
														
 
															-        if table == '1996':
														
 
															+        if table == "1996":
														
 
															             loc = new_table[new_table["category"] == "NITRIC ACID PRODUCTION"].index
														
 
															-            value = new_table.loc[loc, "CH4"].values
														
 
															+            value = new_table.loc[loc, "CH4"].to_numpy()
														
 
															             new_table.loc[loc, "N2O"] = value[0, 0]
														
 
															-            new_table.loc[loc, "CH4"] = ''
														
 
															-        if table == '2015':
														
 
															+            new_table.loc[loc, "CH4"] = ""
														
 
															+        if table == "2015":
														
 
															             loc_total = new_table[
														
 
															-                new_table["category"] == "Total national emissions and removals"].index
														
 
															-            loc_IPPU = new_table[new_table["category"] == "2. Industrial processes"].index
														
 
															-            value = new_table.loc[loc_IPPU, "PFCs"].values
														
 
															+                new_table["category"] == "Total national emissions and removals"
														
 
															+            ].index
														
 
															+            loc_IPPU = new_table[
														
 
															+                new_table["category"] == "2. Industrial processes"
														
 
															+            ].index
														
 
															+            value = new_table.loc[loc_IPPU, "PFCs"].to_numpy()
														
 
															             new_table.loc[loc_total, "PFCs"] = value[0, 0]
														
 
															         # remove lines with empty category
														
 
															         new_table = new_table.drop(new_table[new_table["category"] == ""].index)
														
 
															         # rename E. Other (please specify) according to row above
														
 
															-        e_locs = list(new_table[new_table["category"] == "E. Other (please specify)"].index)
														
 
															+        e_locs = list(
														
 
															+            new_table[new_table["category"] == "E. Other (please specify)"].index
														
 
															+        )
														
 
															         for loc in e_locs:
														
 
															             iloc = new_table.index.get_loc(loc)
														
 
															-            if new_table.iloc[iloc - 1]["category"][
														
 
															-                0] == "D. CO2 emissions and removals from soil":
														
 
															+            if (
														
 
															+                new_table.iloc[iloc - 1]["category"][0]
														
 
															+                == "D. CO2 emissions and removals from soil"
														
 
															+            ):
														
 
															                 new_table.loc[loc]["category"] = "E. Other (LULUCF)"
														
 
															-            elif new_table.iloc[iloc - 1]["category"][0] in ["D.Waste-water handling",
														
 
															-                                                             'D. Waste-water handling']:
														
 
															+            elif new_table.iloc[iloc - 1]["category"][0] in [
														
 
															+                "D.Waste-water handling",
														
 
															+                "D. Waste-water handling",
														
 
															+            ]:
														
 
															                 new_table.loc[loc]["category"] = "E. Other (Waste)"
														
 
															         # rename G. Other (please specify) according to row above
														
 
															-        g_locs = list(new_table[new_table["category"] == "G. Other (please specify)"].index)
														
 
															+        g_locs = list(
														
 
															+            new_table[new_table["category"] == "G. Other (please specify)"].index
														
 
															+        )
														
 
															         for loc in g_locs:
														
 
															             iloc = new_table.index.get_loc(loc)
														
 
															-            if new_table.iloc[iloc - 1]["category"][
														
 
															-                0] == "F. Field burning of agricultural residues":
														
 
															+            if (
														
 
															+                new_table.iloc[iloc - 1]["category"][0]
														
 
															+                == "F. Field burning of agricultural residues"
														
 
															+            ):
														
 
															                 new_table.loc[loc]["category"] = "G. Other (Agri)"
														
 
															-            elif new_table.iloc[iloc - 1]["category"][
														
 
															-                0] == "F. Consumption of halocarbons and sulphur hexafluoride":
														
 
															+            elif (
														
 
															+                new_table.iloc[iloc - 1]["category"][0]
														
 
															+                == "F. Consumption of halocarbons and sulphur hexafluoride"
														
 
															+            ):
														
 
															                 new_table.loc[loc]["category"] = "G. Other (IPPU)"
														
 
															         # set index and convert to long format
														
 
															         new_table = new_table.set_index(inv_tab_conf["index_cols"])
														
 
															-        new_table_long = pm2.pm2io.nir_convert_df_to_long(new_table, year,
														
 
															-                                                          inv_tab_conf["header_long"])
														
 
															+        new_table_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            new_table, year, inv_tab_conf["header_long"]
														
 
															+        )
														
 
															         # remove line breaks in values
														
 
															         new_table_long["data"] = new_table_long["data"].str.replace("\n", "")
														
 
															         if table_inv is None:
														
 
															             table_inv = new_table_long
														
 
															         else:
														
 
															-            table_inv = pd.concat([table_inv, new_table_long], axis=0, join='outer')
														
 
															+            table_inv = pd.concat([table_inv, new_table_long], axis=0, join="outer")
														
 
															             table_inv = table_inv.reset_index(drop=True)
														
 
															     # no NE,NA etc, just numbers, so we can just remove the ','
														
 
															-    table_inv["data"] = table_inv["data"].str.replace(',', '')
														
 
															-    table_inv["data"] = table_inv["data"].str.replace(' ', '')
														
 
															+    table_inv["data"] = table_inv["data"].str.replace(",", "")
														
 
															+    table_inv["data"] = table_inv["data"].str.replace(" ", "")
														
 
															     # ###
														
 
															     # convert to PRIMAP2 interchange format
														
@@ -231,14 +270,14 @@ if __name__ == "__main__":
 
															         # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-        time_format='%Y',
														
 
															+        time_format="%Y",
														
 
															     )
														
 
															     data_pm2_inv = pm2.pm2io.from_interchange_format(data_if_inv)
														
 
															     #### combine
														
 
															     # tolerance needs to be high as rounding in trend tables leads to inconsistent data
														
 
															-    data_pm2 = data_pm2_inv.pr.merge(data_pm2_trends,tolerance=0.11)
														
 
															+    data_pm2 = data_pm2_inv.pr.merge(data_pm2_trends, tolerance=0.11)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -248,40 +287,44 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw"), data_if)
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															+        data_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															-
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     #### processing
														
 
															     data_proc_pm2 = data_pm2
														
 
															     # combine CO2 emissions and removals
														
 
															     temp_CO2 = data_proc_pm2["CO2"].copy()
														
 
															-    #data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].to_array()
														
 
															+    # data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].to_array()
														
 
															     # .pr.sum(dim="variable", skipna=True, min_count=1)
														
 
															-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
														
 
															-        (dim="entity", skipna=True, min_count=1)
														
 
															+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
														
 
															+        dim="entity", skipna=True, min_count=1
														
 
															+    )
														
 
															     data_proc_pm2["CO2"].attrs = temp_CO2.attrs
														
 
															     data_proc_pm2["CO2"] = data_proc_pm2["CO2"].fillna(temp_CO2)
														
 
															     # actual processing
														
 
															     country_processing_step1 = {
														
 
															-        'aggregate_cats': cats_to_agg,
														
 
															+        "aggregate_cats": cats_to_agg,
														
 
															     }
														
 
															     data_proc_pm2 = process_data_for_country(
														
 
															         data_proc_pm2,
														
 
															-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
														
 
															+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
														
 
															         gas_baskets={},
														
 
															         processing_info_country=country_processing_step1,
														
 
															     )
														
 
															     country_processing_step2 = {
														
 
															-        'downscale': downscaling,
														
 
															-        'basket_copy': basket_copy,
														
 
															+        "downscale": downscaling,
														
 
															+        "basket_copy": basket_copy,
														
 
															     }
														
 
															     data_proc_pm2 = process_data_for_country(
														
@@ -289,16 +332,16 @@ if __name__ == "__main__":
 
															         entities_to_ignore=[],
														
 
															         gas_baskets=gas_baskets,
														
 
															         processing_info_country=country_processing_step2,
														
 
															-        cat_terminology_out = terminology_proc,
														
 
															-        category_conversion = cat_conversion,
														
 
															-        sectors_out = sectors_to_save,
														
 
															+        cat_terminology_out=terminology_proc,
														
 
															+        category_conversion=cat_conversion,
														
 
															+        sectors_out=sectors_to_save,
														
 
															     )
														
 
															     # adapt source and metadata
														
 
															     # TODO: processing info is present twice
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -307,9 +350,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Malaysia's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'MYS'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=MYS
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py
@@ -1,16 +1,22 @@
 
															+"""Config for Malaysia's BUR3
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 gwp_to_use = "AR4GWP100"
														
 
															 cat_names_fix = {
														
 
															-    '2A3 Glass Prod.': '2A3 Glass Production',
														
 
															-    '2F6 Other Applications': '2F6 Other Applications (please specify)',
														
 
															-    '3A2 Manure Mngmt': '3A2 Manure Mngmt.',
														
 
															-    '3C7 Rice Cultivations': '3C7 Rice Cultivation',
														
 
															+    "2A3 Glass Prod.": "2A3 Glass Production",
														
 
															+    "2F6 Other Applications": "2F6 Other Applications (please specify)",
														
 
															+    "3A2 Manure Mngmt": "3A2 Manure Mngmt.",
														
 
															+    "3C7 Rice Cultivations": "3C7 Rice Cultivation",
														
 
															 }
														
 
															 values_replacement = {
														
 
															-    '': '-',
														
 
															-    ' ': '-',
														
 
															+    "": "-",
														
 
															+    " ": "-",
														
 
															 }
														
 
															 cols_for_space_stripping = ["Categories"]
														
@@ -18,25 +24,25 @@ cols_for_space_stripping = ["Categories"]
 
															 index_cols = ["Categories", "entity", "unit"]
														
 
															 # parameters part 2: conversion to interchange format
														
 
															-cats_remove = ['Memo items', 'Information items']
														
 
															+cats_remove = ["Memo items", "Information items"]
														
 
															 cat_codes_manual = {
														
 
															-    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
														
 
															-    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
														
 
															-    'CO2 captured': 'M.CCS',
														
 
															-    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
														
 
															-    'For domestic storage': 'M.CCS.DOM',
														
 
															-    'For storage in other countries': 'M.CCS.OCT',
														
 
															-    'International Aviation (International Bunkers)': 'M.BK.A',
														
 
															-    'International Bunkers': 'M.BK',
														
 
															-    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
														
 
															-    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
														
 
															-    'Multilateral Operations': 'M.MULTIOP',
														
 
															-    'Other (please specify)': 'M.OTHER',
														
 
															-    'Total National Emissions and Removals': '0',
														
 
															+    "Annual change in long-term storage of carbon in HWP waste": "M.LTS.AC.HWP",
														
 
															+    "Annual change in total long-term storage of carbon stored": "M.LTS.AC.TOT",
														
 
															+    "CO2 captured": "M.CCS",
														
 
															+    "CO2 from Biomass Burning for Energy Production": "M.BIO",
														
 
															+    "For domestic storage": "M.CCS.DOM",
														
 
															+    "For storage in other countries": "M.CCS.OCT",
														
 
															+    "International Aviation (International Bunkers)": "M.BK.A",
														
 
															+    "International Bunkers": "M.BK",
														
 
															+    "International Water-borne Transport (International Bunkers)": "M.BK.M",
														
 
															+    "Long-term storage of carbon in waste disposal sites": "M.LTS.WASTE",
														
 
															+    "Multilateral Operations": "M.MULTIOP",
														
 
															+    "Other (please specify)": "M.OTHER",
														
 
															+    "Total National Emissions and Removals": "0",
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
														
 
															+cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,4})\s.*"
														
 
															 coords_terminologies = {
														
 
															     "area": "ISO3",
														
@@ -48,17 +54,12 @@ coords_defaults = {
 
															     "source": "MYS-GHG-inventory",
														
 
															     "provenance": "measured",
														
 
															     "area": "MYS",
														
 
															-    "scenario": "BUR3"
														
 
															+    "scenario": "BUR3",
														
 
															 }
														
 
															-coords_value_mapping = {
														
 
															-}
														
 
															+coords_value_mapping = {}
														
 
															-coords_cols = {
														
 
															-    "category": "Categories",
														
 
															-    "entity": "entity",
														
 
															-    "unit": "unit"
														
 
															-}
														
 
															+coords_cols = {"category": "Categories", "entity": "entity", "unit": "unit"}
														
 
															 add_coords_cols = {
														
 
															     "orig_cat_name": ["orig_cat_name", "category"],
														
@@ -76,600 +77,919 @@ meta_data = {
 
															 terminology_proc = coords_terminologies["category"]
														
 
															 table_def_templates = {
														
 
															-    '184': { #184
														
 
															-        "area": ['54,498,793,100'],
														
 
															-        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
														
 
															-               ],
														
 
															-        },
														
 
															-    },
														
 
															-    '185': { #184
														
 
															-        "area": ['34,504,813,99'],
														
 
															-        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
														
 
															-                '2A1 Cement', '2A2 Lime',
														
 
															-               ],
														
 
															-        },
														
 
															-    },
														
 
															-    '186': { #also 200
														
 
															-        "area": ['53,498,786,104'],
														
 
															-        "cols": ['150,197,238,296,347,396,444,489,540,587,634,686,739'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
														
 
															-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
														
 
															-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
														
 
															-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
														
 
															-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
														
 
															-               ],
														
 
															-            2: ['2B9 Fluorochemical'],
														
 
															-        },
														
 
															-    },
														
 
															-    '187': { # also 201
														
 
															-        "area": ['39,499,807,91'],
														
 
															-        "cols": ['132,185,232,280,327,375,425,470,522,568,613,664,713,763'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
														
 
															-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
														
 
															-                '2B3 Adipic Acid', '2B5 Carbide',
														
 
															-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
														
 
															-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys',
														
 
															-               ],
														
 
															-            2: ['2B9 Fluorochemical'],
														
 
															-            5: ['2B4 Caprolactam,'],
														
 
															-        },
														
 
															-    },
														
 
															-    '188': {
														
 
															-        "area": ['48,503,802,92'],
														
 
															-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
														
 
															-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
														
 
															-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
														
 
															-                '2F1 Refrigeration',
														
 
															-               ],
														
 
															-            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '189': {
														
 
															-        "area": ['41,499,806,95'],
														
 
															-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
														
 
															-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
														
 
															-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
														
 
															-                '2F1 Refrigeration',
														
 
															-               ],
														
 
															-            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '190': {
														
 
															-        "area": ['45,500,802,125'],
														
 
															-        "cols": ['146,193,243,295,349,400,453,501,549,595,644,696,748'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
														
 
															-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
														
 
															-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-               ],
														
 
															-            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
														
 
															-        },
														
 
															-    },
														
 
															-    '191': {
														
 
															-        "area": ['38,498,814,120'],
														
 
															-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
														
 
															-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
														
 
															-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-               ],
														
 
															-            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
														
 
															-        },
														
 
															-    },
														
 
															-    '192': {
														
 
															-        "area": ['39,502,807,106'],
														
 
															-        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
														
 
															-                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested Wood',
														
 
															-                '3D2 Other (please',
														
 
															-               ],
														
 
															-            5: ['3C Aggregate',],
														
 
															-        },
														
 
															-    },
														
 
															-    '193': {
														
 
															-        "area": ['36,508,815,119'],
														
 
															-        "cols": ['128,179,228,278,327,379,428,476,525,571,622,670,717,766'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
														
 
															-                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested',
														
 
															-                '3D2 Other (please',
														
 
															-               ],
														
 
															-            5: ['3C Aggregate',],
														
 
															-        },
														
 
															-    },
														
 
															-    '194': {
														
 
															-        "area": ['80,502,762,151'],
														
 
															-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',],
														
 
															-            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
														
 
															-                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
														
 
															-                '4D2 Industrial Wastewater',
														
 
															-               ],
														
 
															-            5: ['5A Indirect N2O'],
														
 
															-        },
														
 
															-    },
														
 
															-    '195': {
														
 
															-        "area": ['78,508,765,103'],
														
 
															-        "cols": ['191,230,271,314,352,400,438,475,519,566,600,645,686,730'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
														
 
															-                '4B Biological', '4D Wastewater', '4D1 Domestic',
														
 
															-                '4D2 Industrial', '5B Other (please'
														
 
															-               ],
														
 
															-            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
														
 
															-                '4A Solid Waste',
														
 
															-               ],
														
 
															-            5: ['5A Indirect N2O'],
														
 
															-        },
														
 
															-    },
														
 
															-    '196': {
														
 
															-        "area": ['80,502,762,151'],
														
 
															-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['International Aviation', 'International Water-borne',
														
 
															-                'CO2 from Biomass Burning', 'For storage in other',
														
 
															-                'Long-term storage of', 'Annual change in total',
														
 
															-                'Annual change in long-',
														
 
															-               ],
														
 
															-        },
														
 
															-    },
														
 
															-    '197': {
														
 
															-        "area": ['74,507,779,201'],
														
 
															-        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['International Aviation', 'International Water-',
														
 
															-                'CO2 from Biomass', 'For storage in other',
														
 
															-                'Long-term storage of', 'Annual change in total',
														
 
															-                'Annual change in long-',
														
 
															-               ],
														
 
															-        },
														
 
															-    },
														
 
															-    '198': { # first CH4 table
														
 
															-        "area": ['54,498,793,100'],
														
 
															-        "cols": ['140,197,250,296,346,394,444,493,540,587,637,685,738'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
														
 
															-               ],
														
 
															-            -3: ['2A Mineral Industry'],
														
 
															-        },
														
 
															-    },
														
 
															-    '199': {
														
 
															-        "area": ['34,506,818,97'],
														
 
															-        "cols": ['132,177,228,276,329,377,432,479,528,574,618,667,722,774'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
														
 
															-                '2A Mineral', '2A2 Lime',
														
 
															-               ],
														
 
															-        },
														
 
															-    },
														
 
															-    '202': {
														
 
															-        "area": ['48,503,802,92'],
														
 
															-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C3 Aluminium', '2C7 Other (please',
														
 
															-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
														
 
															-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
														
 
															-               ],
														
 
															-            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
														
 
															-                '2F1 Refrigeration',
														
 
															-               ],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '203': {
														
 
															-        "area": ['41,499,806,95'],
														
 
															-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C3 Aluminium', '2C7 Other (please',
														
 
															-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
														
 
															-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
														
 
															-               ],
														
 
															-            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
														
 
															-                '2F1 Refrigeration'
														
 
															-               ],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '204': {
														
 
															-        "area": ['45,500,802,125'],
														
 
															-        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F6 Other', '2G Other Product',
														
 
															-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
														
 
															-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-                '3A1 Enteric',
														
 
															-               ],
														
 
															-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
														
 
															-        },
														
 
															-    },
														
 
															-    '205': {
														
 
															-        "area": ['38,498,814,120'],
														
 
															-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F6 Other', '2G Other Product',
														
 
															-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
														
 
															-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-                '3A1 Enteric',
														
 
															-               ],
														
 
															-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
														
 
															-        },
														
 
															-    },
														
 
															-    '206': { #also 220
														
 
															-        "area": ['39,502,807,106'],
														
 
															-        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
														
 
															-                '3C6 Indirect N2O', '3C8 Other (please',
														
 
															-                '3D2 Other (please',
														
 
															-               ],
														
 
															-            2: ['3D1 Harvested Wood',],
														
 
															-            5: ['3C Aggregate',],
														
 
															-        },
														
 
															-    },
														
 
															-    '207': { # also 221
														
 
															-        "area": ['36,508,815,110'],
														
 
															-        "cols": ['128,179,228,278,327,379,428,476,527,571,622,670,717,766'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
														
 
															-                '3C6 Indirect N2O', '3C8 Other (please',
														
 
															-                '3D2 Other (please',
														
 
															-               ],
														
 
															-            2: ['3D1 Harvested',],
														
 
															-            5: ['3C Aggregate',],
														
 
															-        },
														
 
															-    },
														
 
															-    '208': { # also 222
														
 
															-        "area": ['80,502,762,151'],
														
 
															-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
														
 
															-                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
														
 
															-                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
														
 
															-                '4D2 Industrial Wastewater'
														
 
															-               ],
														
 
															-            5: ['5A Indirect N2O'],
														
 
															-        },
														
 
															-    },
														
 
															-    '209': { # also 223
														
 
															-        "area": ['78,508,765,103'],
														
 
															-        "cols": ['191,230,271,314,352,400,438,475,519,560,600,645,686,730'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
														
 
															-                '4B Biological', '4D Wastewater', '4D1 Domestic',
														
 
															-                '4D2 Industrial', '5B Other (please',
														
 
															-                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
														
 
															-                '4A Solid Waste'
														
 
															-               ],
														
 
															-            5: ['5A Indirect N2O'],
														
 
															-        },
														
 
															-    },
														
 
															-    '210': { # also 224
														
 
															-        "area": ['80,502,762,151'],
														
 
															-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['International Aviation', 'International Water-borne',
														
 
															-                'Long-term storage of', 'Annual change in total',
														
 
															-                'Annual change in long-',
														
 
															-               ],
														
 
															-            2: ['CO2 from Biomass Burning', 'For storage in other',],
														
 
															-        },
														
 
															-    },
														
 
															-    '211': { # also 225
														
 
															-        "area": ['74,507,779,201'],
														
 
															-        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['International Aviation', 'International Water-',
														
 
															-                'Long-term storage of', 'Annual change in total',
														
 
															-                'Annual change in long-', 'CO2 from Biomass',
														
 
															-               ],
														
 
															-            2: ['For storage in other',],
														
 
															-        },
														
 
															-    },
														
 
															-    '212': {
														
 
															-        "area": ['54,498,793,100'],
														
 
															-        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL',
														
 
															-               ],
														
 
															-            2: ['2A1 Cement',],
														
 
															-        },
														
 
															-    },
														
 
															-    '213': {
														
 
															-        "area": ['34,504,813,99'],
														
 
															-        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
														
 
															-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
														
 
															-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
														
 
															-               ],
														
 
															-            2: ['2A1 Cement', '2A2 Lime',],
														
 
															-        },
														
 
															-    },
														
 
															-    '214': {
														
 
															-        "area": ['47,499,801,93'],
														
 
															-        "cols": ['141,197,246,297,350,396,453,502,550,595,642,692,748'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2A5 Other (please',
														
 
															-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
														
 
															-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
														
 
															-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
														
 
															-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
														
 
															-               ],
														
 
															-            2: ['2A3 Glass', '2A4 Other Process', '2B9 Fluorochemical'],
														
 
															-            -3: ['2C Metal Industry'],
														
 
															-        },
														
 
															-    },
														
 
															-    '215': {
														
 
															-        "area": ['39,499,807,91'],
														
 
															-        "cols": ['132,180,232,280,327,375,425,470,522,568,613,664,713,763'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2A5 Other (please',
														
 
															-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
														
 
															-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
														
 
															-                '2B6 Titanium Dioxide', '2B7 Soda Ash', '2B8 Petrochemical',
														
 
															-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
														
 
															-               ],
														
 
															-            2: ['2A4 Other Process', '2B9 Fluorochemical'],
														
 
															-            -3: ['2C Metal Industry'],
														
 
															-        },
														
 
															-    },
														
 
															-    '216': {
														
 
															-        "area": ['48,503,802,92'],
														
 
															-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
														
 
															-                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
														
 
															-                '2E5 Other (please',
														
 
															-               ],
														
 
															-            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
														
 
															-                '2E4 Heat Transfer', '2F1 Refrigeration',
														
 
															-               ],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '217': {
														
 
															-        "area": ['41,499,806,95'],
														
 
															-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
														
 
															-                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
														
 
															-                '2E5 Other (please',
														
 
															-               ],
														
 
															-            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
														
 
															-                '2E4 Heat Transfer', '2F1 Refrigeration',
														
 
															-               ],
														
 
															-            5: ['2F Product Uses as'],
														
 
															-        },
														
 
															-    },
														
 
															-    '218': {
														
 
															-        "area": ['45,500,802,125'],
														
 
															-        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
														
 
															-                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-               ],
														
 
															-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
														
 
															-                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
														
 
															-        },
														
 
															-    },
														
 
															-    '219': {
														
 
															-        "area": ['38,498,814,120'],
														
 
															-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
														
 
															-                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
														
 
															-               ],
														
 
															-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
														
 
															-                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
														
 
															-        },
														
 
															-    },
														
 
															-    '226': { # also 334, 238
														
 
															-        "area": ['48,510,797,99'],
														
 
															-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
														
 
															-        }
														
 
															-    },
														
 
															-    '227': { # also 331, 335, 339
														
 
															-        "area": ['27,510,818,99'],
														
 
															-        "cols": ['250,290,333,372,413,452,494,536,576,616,656,699,739,781'],
														
 
															-        "rows_to_fix": {
														
 
															-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
														
 
															-        }
														
 
															-    },
														
 
															-    '228': {
														
 
															-        "area": ['48,510,797,99'],
														
 
															-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone'],
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent'],
														
 
															-        },
														
 
															-    },
														
 
															-    '229': {
														
 
															-        "area": ['25,512,819,86'],
														
 
															-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
														
 
															-        "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone'],
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent'],
														
 
															+    "184": {  # 184
														
 
															+        "area": ["54,498,793,100"],
														
 
															+        "cols": ["150,197,250,296,346,394,444,493,540,587,637,685,738"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel Combustion",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other emissions",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+                "2A1 Cement",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "185": {  # 184
														
 
															+        "area": ["34,504,813,99"],
														
 
															+        "cols": ["128,177,224,273,321,373,425,473,519,564,611,661,713,765"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+                "2A Mineral",
														
 
															+                "2A1 Cement",
														
 
															+                "2A2 Lime",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "186": {  # also 200
														
 
															+        "area": ["53,498,786,104"],
														
 
															+        "cols": ["150,197,238,296,347,396,444,489,540,587,634,686,739"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2A3 Glass",
														
 
															+                "2A4 Other Process",
														
 
															+                "2A5 Other (please",
														
 
															+                "2B Chemical",
														
 
															+                "2B1 Ammonia",
														
 
															+                "2B2 Nitric Acid",
														
 
															+                "2B3 Adipic Acid",
														
 
															+                "2B4 Caprolactam,",
														
 
															+                "2B5 Carbide",
														
 
															+                "2B6 Titanium",
														
 
															+                "2B7 Soda Ash",
														
 
															+                "2B8 Petrochemical",
														
 
															+                "2B10 Other (Please",
														
 
															+                "2C1 Iron and Steel",
														
 
															+                "2C2 Ferroalloys",
														
 
															+            ],
														
 
															+            2: ["2B9 Fluorochemical"],
														
 
															+        },
														
 
															+    },
														
 
															+    "187": {  # also 201
														
 
															+        "area": ["39,499,807,91"],
														
 
															+        "cols": ["132,185,232,280,327,375,425,470,522,568,613,664,713,763"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2A3 Glass",
														
 
															+                "2A4 Other Process",
														
 
															+                "2A5 Other (please",
														
 
															+                "2B Chemical",
														
 
															+                "2B1 Ammonia",
														
 
															+                "2B2 Nitric Acid",
														
 
															+                "2B3 Adipic Acid",
														
 
															+                "2B5 Carbide",
														
 
															+                "2B6 Titanium",
														
 
															+                "2B7 Soda Ash",
														
 
															+                "2B8 Petrochemical",
														
 
															+                "2B10 Other (Please",
														
 
															+                "2C1 Iron and Steel",
														
 
															+                "2C2 Ferroalloys",
														
 
															+            ],
														
 
															+            2: ["2B9 Fluorochemical"],
														
 
															+            5: ["2B4 Caprolactam,"],
														
 
															+        },
														
 
															+    },
														
 
															+    "188": {
														
 
															+        "area": ["48,503,802,92"],
														
 
															+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C4 Magnesium",
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            2: ["2E2 TFT Flat Panel", "2E4 Heat Transfer"],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "189": {
														
 
															+        "area": ["41,499,806,95"],
														
 
															+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C4 Magnesium",
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            2: ["2E2 TFT Flat Panel", "2E4 Heat Transfer"],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "190": {
														
 
															+        "area": ["45,500,802,125"],
														
 
															+        "cols": ["146,193,243,295,349,400,453,501,549,595,644,696,748"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F2 Foam Blowing",
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+            ],
														
 
															+            2: ["2G1 Electrical", "2G3 N2O from", "3A1 Enteric"],
														
 
															+        },
														
 
															+    },
														
 
															+    "191": {
														
 
															+        "area": ["38,498,814,120"],
														
 
															+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F2 Foam Blowing",
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+            ],
														
 
															+            2: ["2G1 Electrical", "2G3 N2O from", "3A1 Enteric"],
														
 
															+        },
														
 
															+    },
														
 
															+    "192": {
														
 
															+        "area": ["39,502,807,106"],
														
 
															+        "cols": ["134,193,245,296,346,400,455,507,556,602,650,701,755"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "3C1 Emissions from",
														
 
															+                "3C4 Direct N2O",
														
 
															+                "3C5 Indirect N2O",
														
 
															+                "3C6 Indirect N2O",
														
 
															+                "3C8 Other (please",
														
 
															+                "3D1 Harvested Wood",
														
 
															+                "3D2 Other (please",
														
 
															+            ],
														
 
															+            5: [
														
 
															+                "3C Aggregate",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "193": {
														
 
															+        "area": ["36,508,815,119"],
														
 
															+        "cols": ["128,179,228,278,327,379,428,476,525,571,622,670,717,766"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "3C1 Emissions from",
														
 
															+                "3C4 Direct N2O",
														
 
															+                "3C5 Indirect N2O",
														
 
															+                "3C6 Indirect N2O",
														
 
															+                "3C8 Other (please",
														
 
															+                "3D1 Harvested",
														
 
															+                "3D2 Other (please",
														
 
															+            ],
														
 
															+            5: [
														
 
															+                "3C Aggregate",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "194": {
														
 
															+        "area": ["80,502,762,151"],
														
 
															+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "4C Incineration and",
														
 
															+                "4C2 Open Burning of",
														
 
															+                "4E Other",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "4A1 Managed Waste",
														
 
															+                "4A2 Unmanaged Waste",
														
 
															+                "4A3 Uncategorised Waste",
														
 
															+                "4B Biological Treatment",
														
 
															+                "4D Wastewater",
														
 
															+                "4D1 Domestic Wastewater",
														
 
															+                "4D2 Industrial Wastewater",
														
 
															+            ],
														
 
															+            5: ["5A Indirect N2O"],
														
 
															+        },
														
 
															+    },
														
 
															+    "195": {
														
 
															+        "area": ["78,508,765,103"],
														
 
															+        "cols": ["191,230,271,314,352,400,438,475,519,566,600,645,686,730"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "4C Incineration and",
														
 
															+                "4C2 Open Burning of",
														
 
															+                "4E Other",
														
 
															+                "4B Biological",
														
 
															+                "4D Wastewater",
														
 
															+                "4D1 Domestic",
														
 
															+                "4D2 Industrial",
														
 
															+                "5B Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "4A1 Managed Waste",
														
 
															+                "4A2 Unmanaged Waste",
														
 
															+                "4A3 Uncategorised",
														
 
															+                "4A Solid Waste",
														
 
															+            ],
														
 
															+            5: ["5A Indirect N2O"],
														
 
															+        },
														
 
															+    },
														
 
															+    "196": {
														
 
															+        "area": ["80,502,762,151"],
														
 
															+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "International Aviation",
														
 
															+                "International Water-borne",
														
 
															+                "CO2 from Biomass Burning",
														
 
															+                "For storage in other",
														
 
															+                "Long-term storage of",
														
 
															+                "Annual change in total",
														
 
															+                "Annual change in long-",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "197": {
														
 
															+        "area": ["74,507,779,201"],
														
 
															+        "cols": ["182,226,268,311,354,398,444,482,524,565,610,654,693,733"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "International Aviation",
														
 
															+                "International Water-",
														
 
															+                "CO2 from Biomass",
														
 
															+                "For storage in other",
														
 
															+                "Long-term storage of",
														
 
															+                "Annual change in total",
														
 
															+                "Annual change in long-",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "198": {  # first CH4 table
														
 
															+        "area": ["54,498,793,100"],
														
 
															+        "cols": ["140,197,250,296,346,394,444,493,540,587,637,685,738"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel Combustion",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other emissions",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+                "2A1 Cement",
														
 
															+            ],
														
 
															+            -3: ["2A Mineral Industry"],
														
 
															+        },
														
 
															+    },
														
 
															+    "199": {
														
 
															+        "area": ["34,506,818,97"],
														
 
															+        "cols": ["132,177,228,276,329,377,432,479,528,574,618,667,722,774"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+                "2A1 Cement",
														
 
															+                "2A Mineral",
														
 
															+                "2A2 Lime",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "202": {
														
 
															+        "area": ["48,503,802,92"],
														
 
															+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2C4 Magnesium",
														
 
															+                "2E2 TFT Flat Panel",
														
 
															+                "2E4 Heat Transfer",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "203": {
														
 
															+        "area": ["41,499,806,95"],
														
 
															+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2C4 Magnesium",
														
 
															+                "2E2 TFT Flat Panel",
														
 
															+                "2E4 Heat Transfer",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "204": {
														
 
															+        "area": ["45,500,802,125"],
														
 
															+        "cols": ["146,193,243,295,349,400,455,501,549,595,644,696,748"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+                "3A1 Enteric",
														
 
															+            ],
														
 
															+            2: ["2F2 Foam Blowing", "2G1 Electrical", "2G3 N2O from"],
														
 
															+        },
														
 
															+    },
														
 
															+    "205": {
														
 
															+        "area": ["38,498,814,120"],
														
 
															+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+                "3A1 Enteric",
														
 
															+            ],
														
 
															+            2: ["2F2 Foam Blowing", "2G1 Electrical", "2G3 N2O from"],
														
 
															+        },
														
 
															+    },
														
 
															+    "206": {  # also 220
														
 
															+        "area": ["39,502,807,106"],
														
 
															+        "cols": ["134,193,245,296,346,400,455,507,556,602,650,701,755"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "3C1 Emissions from",
														
 
															+                "3C4 Direct N2O",
														
 
															+                "3C5 Indirect N2O",
														
 
															+                "3C6 Indirect N2O",
														
 
															+                "3C8 Other (please",
														
 
															+                "3D2 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "3D1 Harvested Wood",
														
 
															+            ],
														
 
															+            5: [
														
 
															+                "3C Aggregate",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "207": {  # also 221
														
 
															+        "area": ["36,508,815,110"],
														
 
															+        "cols": ["128,179,228,278,327,379,428,476,527,571,622,670,717,766"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "3C1 Emissions from",
														
 
															+                "3C4 Direct N2O",
														
 
															+                "3C5 Indirect N2O",
														
 
															+                "3C6 Indirect N2O",
														
 
															+                "3C8 Other (please",
														
 
															+                "3D2 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "3D1 Harvested",
														
 
															+            ],
														
 
															+            5: [
														
 
															+                "3C Aggregate",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "208": {  # also 222
														
 
															+        "area": ["80,502,762,151"],
														
 
															+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "4C Incineration and",
														
 
															+                "4C2 Open Burning of",
														
 
															+                "4E Other",
														
 
															+                "4A1 Managed Waste",
														
 
															+                "4A2 Unmanaged Waste",
														
 
															+                "4A3 Uncategorised Waste",
														
 
															+                "4B Biological Treatment",
														
 
															+                "4D Wastewater",
														
 
															+                "4D1 Domestic Wastewater",
														
 
															+                "4D2 Industrial Wastewater",
														
 
															+            ],
														
 
															+            5: ["5A Indirect N2O"],
														
 
															+        },
														
 
															+    },
														
 
															+    "209": {  # also 223
														
 
															+        "area": ["78,508,765,103"],
														
 
															+        "cols": ["191,230,271,314,352,400,438,475,519,560,600,645,686,730"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "4C Incineration and",
														
 
															+                "4C2 Open Burning of",
														
 
															+                "4E Other",
														
 
															+                "4B Biological",
														
 
															+                "4D Wastewater",
														
 
															+                "4D1 Domestic",
														
 
															+                "4D2 Industrial",
														
 
															+                "5B Other (please",
														
 
															+                "4A1 Managed Waste",
														
 
															+                "4A2 Unmanaged Waste",
														
 
															+                "4A3 Uncategorised",
														
 
															+                "4A Solid Waste",
														
 
															+            ],
														
 
															+            5: ["5A Indirect N2O"],
														
 
															+        },
														
 
															+    },
														
 
															+    "210": {  # also 224
														
 
															+        "area": ["80,502,762,151"],
														
 
															+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "International Aviation",
														
 
															+                "International Water-borne",
														
 
															+                "Long-term storage of",
														
 
															+                "Annual change in total",
														
 
															+                "Annual change in long-",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "CO2 from Biomass Burning",
														
 
															+                "For storage in other",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "211": {  # also 225
														
 
															+        "area": ["74,507,779,201"],
														
 
															+        "cols": ["182,226,268,311,354,398,444,482,524,565,610,654,693,733"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "International Aviation",
														
 
															+                "International Water-",
														
 
															+                "Long-term storage of",
														
 
															+                "Annual change in total",
														
 
															+                "Annual change in long-",
														
 
															+                "CO2 from Biomass",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "For storage in other",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "212": {
														
 
															+        "area": ["54,498,793,100"],
														
 
															+        "cols": ["150,197,250,296,346,394,444,493,540,587,637,685,738"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel Combustion",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other emissions",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2A1 Cement",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "213": {
														
 
															+        "area": ["34,504,813,99"],
														
 
															+        "cols": ["128,177,224,273,321,373,425,473,519,564,611,661,713,765"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "Total National",
														
 
															+                "1A Fuel",
														
 
															+                "1A1 Energy",
														
 
															+                "1A2 Manufacturing",
														
 
															+                "1B Fugitive",
														
 
															+                "1B2 Oil and Natural",
														
 
															+                "1B3 Other",
														
 
															+                "1C Carbon Dioxide",
														
 
															+                "2 INDUSTRIAL",
														
 
															+                "2A Mineral",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2A1 Cement",
														
 
															+                "2A2 Lime",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "214": {
														
 
															+        "area": ["47,499,801,93"],
														
 
															+        "cols": ["141,197,246,297,350,396,453,502,550,595,642,692,748"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2A5 Other (please",
														
 
															+                "2B Chemical",
														
 
															+                "2B1 Ammonia",
														
 
															+                "2B2 Nitric Acid",
														
 
															+                "2B3 Adipic Acid",
														
 
															+                "2B4 Caprolactam,",
														
 
															+                "2B5 Carbide",
														
 
															+                "2B6 Titanium",
														
 
															+                "2B7 Soda Ash",
														
 
															+                "2B8 Petrochemical",
														
 
															+                "2B10 Other (Please",
														
 
															+                "2C1 Iron and Steel",
														
 
															+                "2C2 Ferroalloys",
														
 
															+            ],
														
 
															+            2: ["2A3 Glass", "2A4 Other Process", "2B9 Fluorochemical"],
														
 
															+            -3: ["2C Metal Industry"],
														
 
															+        },
														
 
															+    },
														
 
															+    "215": {
														
 
															+        "area": ["39,499,807,91"],
														
 
															+        "cols": ["132,180,232,280,327,375,425,470,522,568,613,664,713,763"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2A5 Other (please",
														
 
															+                "2B Chemical",
														
 
															+                "2B1 Ammonia",
														
 
															+                "2B2 Nitric Acid",
														
 
															+                "2B3 Adipic Acid",
														
 
															+                "2B4 Caprolactam,",
														
 
															+                "2B5 Carbide",
														
 
															+                "2B6 Titanium Dioxide",
														
 
															+                "2B7 Soda Ash",
														
 
															+                "2B8 Petrochemical",
														
 
															+                "2B10 Other (Please",
														
 
															+                "2C1 Iron and Steel",
														
 
															+                "2C2 Ferroalloys",
														
 
															+            ],
														
 
															+            2: ["2A4 Other Process", "2B9 Fluorochemical"],
														
 
															+            -3: ["2C Metal Industry"],
														
 
															+        },
														
 
															+    },
														
 
															+    "216": {
														
 
															+        "area": ["48,503,802,92"],
														
 
															+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C4 Magnesium",
														
 
															+                "2E2 TFT Flat Panel",
														
 
															+                "2E4 Heat Transfer",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "217": {
														
 
															+        "area": ["41,499,806,95"],
														
 
															+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2C7 Other (please",
														
 
															+                "2D Non-Energy",
														
 
															+                "2D2 Paraffin Wax",
														
 
															+                "2D4 Other (please",
														
 
															+                "2E Electronics",
														
 
															+                "2E1 Integrated",
														
 
															+                "2E5 Other (please",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2C3 Aluminium",
														
 
															+                "2C4 Magnesium",
														
 
															+                "2E2 TFT Flat Panel",
														
 
															+                "2E4 Heat Transfer",
														
 
															+                "2F1 Refrigeration",
														
 
															+            ],
														
 
															+            5: ["2F Product Uses as"],
														
 
															+        },
														
 
															+    },
														
 
															+    "218": {
														
 
															+        "area": ["45,500,802,125"],
														
 
															+        "cols": ["146,193,243,295,349,400,455,501,549,595,644,696,748"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G3 N2O from",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2F2 Foam Blowing",
														
 
															+                "2G1 Electrical",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "3A1 Enteric",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "219": {
														
 
															+        "area": ["38,498,814,120"],
														
 
															+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: [
														
 
															+                "2F6 Other",
														
 
															+                "2G Other Product",
														
 
															+                "2G2 SF6 and PFCs",
														
 
															+                "2G3 N2O from",
														
 
															+                "2H3 Other (please",
														
 
															+                "3 AGRICULTURE,",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2F2 Foam Blowing",
														
 
															+                "2G1 Electrical",
														
 
															+                "2G4 Other (Please",
														
 
															+                "2H1 Pulp and Paper",
														
 
															+                "2H2 Food and",
														
 
															+                "3A1 Enteric",
														
 
															+            ],
														
 
															+        },
														
 
															+    },
														
 
															+    "226": {  # also 334, 238
														
 
															+        "area": ["48,510,797,99"],
														
 
															+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
														
 
															+        },
														
 
															+    },
														
 
															+    "227": {  # also 331, 335, 339
														
 
															+        "area": ["27,510,818,99"],
														
 
															+        "cols": ["250,290,333,372,413,452,494,536,576,616,656,699,739,781"],
														
 
															+        "rows_to_fix": {
														
 
															+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
														
 
															+        },
														
 
															+    },
														
 
															+    "228": {
														
 
															+        "area": ["48,510,797,99"],
														
 
															+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone"],
														
 
															+            2: ["2D Non-Energy Products from Fuels and Solvent"],
														
 
															+        },
														
 
															+    },
														
 
															+    "229": {
														
 
															+        "area": ["25,512,819,86"],
														
 
															+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
														
 
															+        "rows_to_fix": {
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone"],
														
 
															+            2: ["2D Non-Energy Products from Fuels and Solvent"],
														
 
															+        },
														
 
															+    },
														
 
															+    "230": {
														
 
															+        "area": ["48,510,797,99"],
														
 
															+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            -3: [
														
 
															+                "Total National Emissions and Removals",
														
 
															+                "2 INDUSTRIAL PROCESSES AND PRODUCT USE",
														
 
															+            ],
														
 
															+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
														
 
															+        },
														
 
															+    },
														
 
															+    "232": {  # also 236
														
 
															+        "area": ["48,510,797,99"],
														
 
															+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
														
 
															+        "rows_to_fix": {
														
 
															+            -3: [
														
 
															+                "2G2 SF6 and PFCs from Other Product Uses",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "2D Non-Energy Products from Fuels and Solvent",
														
 
															+                "2F Product Uses as Substitutes for Ozone",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '230': {
														
 
															-        "area": ['48,510,797,99'],
														
 
															-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            -3: ['Total National Emissions and Removals', '2 INDUSTRIAL PROCESSES AND PRODUCT USE'],
														
 
															-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
														
 
															-        }
														
 
															-    },
														
 
															-    '232': { # also 236
														
 
															-        "area": ['48,510,797,99'],
														
 
															-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
														
 
															-        "rows_to_fix": {
														
 
															-            -3: ['2G2 SF6 and PFCs from Other Product Uses',],
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent',
														
 
															-                '2F Product Uses as Substitutes for Ozone',]
														
 
															-        },
														
 
															-    },
														
 
															-    '233': {
														
 
															-        "area": ['25,512,819,86'],
														
 
															-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
														
 
															-        "rows_to_fix": {
														
 
															-            -5: ['2F Product Uses as Substitutes for Ozone'],
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent'],
														
 
															-            -3: ['2G Other Product Manufacture and Use',
														
 
															-                 '2G2 SF6 and PFCs from Other Product Uses',]
														
 
															+    "233": {
														
 
															+        "area": ["25,512,819,86"],
														
 
															+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
														
 
															+        "rows_to_fix": {
														
 
															+            -5: ["2F Product Uses as Substitutes for Ozone"],
														
 
															+            2: ["2D Non-Energy Products from Fuels and Solvent"],
														
 
															+            -3: [
														
 
															+                "2G Other Product Manufacture and Use",
														
 
															+                "2G2 SF6 and PFCs from Other Product Uses",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '237': {
														
 
															-        "area": ['25,512,819,86'],
														
 
															-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
														
 
															+    "237": {
														
 
															+        "area": ["25,512,819,86"],
														
 
															+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent',
														
 
															-                '2F Product Uses as Substitutes for Ozone'],
														
 
															+            2: [
														
 
															+                "2D Non-Energy Products from Fuels and Solvent",
														
 
															+                "2F Product Uses as Substitutes for Ozone",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '240': {
														
 
															-        "area": ['48,510,797,99'],
														
 
															-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
														
 
															+    "240": {
														
 
															+        "area": ["48,510,797,99"],
														
 
															+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent',
														
 
															-                '2F Product Uses as Substitutes for Ozone'],
														
 
															-            -3: ['2E Electronics Industry',
														
 
															-                 '2F1 Refrigeration and Air Conditioning',
														
 
															-                 '2G2 SF6 and PFCs from Other Product Uses',],
														
 
															+            2: [
														
 
															+                "2D Non-Energy Products from Fuels and Solvent",
														
 
															+                "2F Product Uses as Substitutes for Ozone",
														
 
															+            ],
														
 
															+            -3: [
														
 
															+                "2E Electronics Industry",
														
 
															+                "2F1 Refrigeration and Air Conditioning",
														
 
															+                "2G2 SF6 and PFCs from Other Product Uses",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '241': {
														
 
															-        "area": ['25,512,819,86'],
														
 
															-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
														
 
															+    "241": {
														
 
															+        "area": ["25,512,819,86"],
														
 
															+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2D Non-Energy Products from Fuels and Solvent',
														
 
															-                '2F Product Uses as Substitutes for Ozone',
														
 
															-                '2E1 Integrated Circuit or Semiconductor',],
														
 
															-            -3: ['2F1 Refrigeration and Air Conditioning',
														
 
															-                 '2G2 SF6 and PFCs from Other Product Uses',],
														
 
															+            2: [
														
 
															+                "2D Non-Energy Products from Fuels and Solvent",
														
 
															+                "2F Product Uses as Substitutes for Ozone",
														
 
															+                "2E1 Integrated Circuit or Semiconductor",
														
 
															+            ],
														
 
															+            -3: [
														
 
															+                "2F1 Refrigeration and Air Conditioning",
														
 
															+                "2G2 SF6 and PFCs from Other Product Uses",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 table_defs = {
														
 
															-    '184': {"template": '184', "entity": "CO2", "unit": "Gg CO2 / yr"}, #CO2
														
 
															-    '185': {"template": '185', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '186': {"template": '186', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '187': {"template": '187', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '188': {"template": '188', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '189': {"template": '189', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '190': {"template": '190', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '191': {"template": '191', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '192': {"template": '192', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '193': {"template": '193', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '194': {"template": '194', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '195': {"template": '195', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '196': {"template": '196', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '197': {"template": '197', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '198': {"template": '198', "entity": "CH4", "unit": "Gg CH4 / yr"}, #CH4
														
 
															-    '199': {"template": '199', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '200': {"template": '186', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '201': {"template": '187', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '202': {"template": '202', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '203': {"template": '203', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '204': {"template": '204', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '205': {"template": '205', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '206': {"template": '206', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '207': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '208': {"template": '208', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '209': {"template": '209', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '210': {"template": '210', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '211': {"template": '211', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '212': {"template": '212', "entity": "N2O", "unit": "Gg N2O / yr"}, #N2O
														
 
															-    '213': {"template": '213', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '214': {"template": '214', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '215': {"template": '215', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '216': {"template": '216', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '217': {"template": '217', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '218': {"template": '218', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '219': {"template": '219', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '220': {"template": '206', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '221': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '222': {"template": '208', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '223': {"template": '209', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '224': {"template": '210', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '225': {"template": '211', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '226': {"template": '226', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #HFCs
														
 
															-    '227': {"template": '227', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '228': {"template": '228', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '229': {"template": '229', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '230': {"template": '230', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #PFCs
														
 
															-    '231': {"template": '227', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '232': {"template": '232', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '233': {"template": '233', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '234': {"template": '226', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #SF6
														
 
															-    '235': {"template": '227', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '236': {"template": '232', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '237': {"template": '237', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '238': {"template": '226', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #NF3
														
 
															-    '239': {"template": '227', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '240': {"template": '240', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															-    '241': {"template": '241', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "184": {"template": "184", "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
														
 
															+    "185": {"template": "185", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "186": {"template": "186", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "187": {"template": "187", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "188": {"template": "188", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "189": {"template": "189", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "190": {"template": "190", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "191": {"template": "191", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "192": {"template": "192", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "193": {"template": "193", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "194": {"template": "194", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "195": {"template": "195", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "196": {"template": "196", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "197": {"template": "197", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "198": {"template": "198", "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
														
 
															+    "199": {"template": "199", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "200": {"template": "186", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "201": {"template": "187", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "202": {"template": "202", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "203": {"template": "203", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "204": {"template": "204", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "205": {"template": "205", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "206": {"template": "206", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "207": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "208": {"template": "208", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "209": {"template": "209", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "210": {"template": "210", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "211": {"template": "211", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "212": {"template": "212", "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
														
 
															+    "213": {"template": "213", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "214": {"template": "214", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "215": {"template": "215", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "216": {"template": "216", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "217": {"template": "217", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "218": {"template": "218", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "219": {"template": "219", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "220": {"template": "206", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "221": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "222": {"template": "208", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "223": {"template": "209", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "224": {"template": "210", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "225": {"template": "211", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "226": {
														
 
															+        "template": "226",
														
 
															+        "entity": "HFCS (AR4GWP100)",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # HFCs
														
 
															+    "227": {"template": "227", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "228": {"template": "228", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "229": {"template": "229", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "230": {
														
 
															+        "template": "230",
														
 
															+        "entity": "PFCS (AR4GWP100)",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # PFCs
														
 
															+    "231": {"template": "227", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "232": {"template": "232", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "233": {"template": "233", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "234": {
														
 
															+        "template": "226",
														
 
															+        "entity": "SF6 (AR4GWP100)",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # SF6
														
 
															+    "235": {"template": "227", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "236": {"template": "232", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "237": {"template": "237", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "238": {
														
 
															+        "template": "226",
														
 
															+        "entity": "NF3 (AR4GWP100)",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # NF3
														
 
															+    "239": {"template": "227", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "240": {"template": "240", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															+    "241": {"template": "241", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
														
 
															 }
														
 
															 country_processing_step1 = {
														
 
															-    'aggregate_cats': {
														
 
															-        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
														
 
															-                                 '3.C.6', '3.C.7', '3.C.8'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
														
 
															-                             '(Agriculture)'},
														
 
															-        'M.3.D.AG': {'sources': ['3.D.2'],
														
 
															-                     'name': 'Other (Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
														
 
															-                     'name': 'Agriculture excluding livestock'},
														
 
															-        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
														
 
															-                     'name': 'Agriculture'},
														
 
															-        'M.3.D.LU': {'sources': ['3.D.1'],
														
 
															-                     'name': 'Other (LULUCF)'},
														
 
															-        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
														
 
															-                     'name': 'LULUCF'},
														
 
															-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
														
 
															-                     'name': 'National total emissions excluding LULUCF'},
														
 
															-    },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "aggregate_cats": {
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": [
														
 
															+                "3.C.1",
														
 
															+                "3.C.2",
														
 
															+                "3.C.3",
														
 
															+                "3.C.4",
														
 
															+                "3.C.5",
														
 
															+                "3.C.6",
														
 
															+                "3.C.7",
														
 
															+                "3.C.8",
														
 
															+            ],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land "
														
 
															+            "(Agriculture)",
														
 
															+        },
														
 
															+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG", "M.3.D.AG"],
														
 
															+            "name": "Agriculture excluding livestock",
														
 
															+        },
														
 
															+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
														
 
															+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
														
 
															+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
														
 
															+        "M.0.EL": {
														
 
															+            "sources": ["1", "2", "M.AG", "4", "5"],
														
 
															+            "name": "National total emissions excluding LULUCF",
														
 
															+        },
														
 
															+    },
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
 
															 gas_baskets = {
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
														
 
															+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
														
 
															+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
														
 
															+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py
@@ -1,3 +1,9 @@
 
															+"""Config for Malaysia's BUR4
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 gwp_to_use = "AR4GWP100"
														
@@ -9,8 +15,8 @@ cat_names_fix = {
 
															 }
														
 
															 values_replacement = {
														
 
															-    '': '-',
														
 
															-    ' ': '-',
														
 
															+    "": "-",
														
 
															+    " ": "-",
														
 
															 }
														
 
															 cols_for_space_stripping = ["Categories"]
														
@@ -18,25 +24,25 @@ cols_for_space_stripping = ["Categories"]
 
															 index_cols = ["Categories", "entity", "unit"]
														
 
															 # parameters part 2: conversion to interchange format
														
 
															-cats_remove = ['Memo items', 'Information items',  'Information items (1)']
														
 
															+cats_remove = ["Memo items", "Information items", "Information items (1)"]
														
 
															 cat_codes_manual = {
														
 
															-    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
														
 
															-    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
														
 
															-    'CO2 captured': 'M.CCS',
														
 
															-    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
														
 
															-    'For domestic storage': 'M.CCS.DOM',
														
 
															-    'For storage in other countries': 'M.CCS.OCT',
														
 
															-    'International Aviation (International Bunkers)': 'M.BK.A',
														
 
															-    'International Bunkers': 'M.BK',
														
 
															-    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
														
 
															-    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
														
 
															-    'Multilateral Operations': 'M.MULTIOP',
														
 
															-    'Other (please specify)': 'M.OTHER',
														
 
															-    'Total National Emissions and Removals': '0',
														
 
															+    "Annual change in long-term storage of carbon in HWP waste": "M.LTS.AC.HWP",
														
 
															+    "Annual change in total long-term storage of carbon stored": "M.LTS.AC.TOT",
														
 
															+    "CO2 captured": "M.CCS",
														
 
															+    "CO2 from Biomass Burning for Energy Production": "M.BIO",
														
 
															+    "For domestic storage": "M.CCS.DOM",
														
 
															+    "For storage in other countries": "M.CCS.OCT",
														
 
															+    "International Aviation (International Bunkers)": "M.BK.A",
														
 
															+    "International Bunkers": "M.BK",
														
 
															+    "International Water-borne Transport (International Bunkers)": "M.BK.M",
														
 
															+    "Long-term storage of carbon in waste disposal sites": "M.LTS.WASTE",
														
 
															+    "Multilateral Operations": "M.MULTIOP",
														
 
															+    "Other (please specify)": "M.OTHER",
														
 
															+    "Total National Emissions and Removals": "0",
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
														
 
															+cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,4})\s.*"
														
 
															 coords_terminologies = {
														
@@ -49,27 +55,22 @@ coords_defaults = {
 
															     "source": "MYS-GHG-inventory",
														
 
															     "provenance": "measured",
														
 
															     "area": "MYS",
														
 
															-    "scenario": "BUR4"
														
 
															+    "scenario": "BUR4",
														
 
															 }
														
 
															-coords_value_mapping = {
														
 
															-}
														
 
															+coords_value_mapping = {}
														
 
															-coords_cols = {
														
 
															-    "category": "Categories",
														
 
															-    "entity": "entity",
														
 
															-    "unit": "unit"
														
 
															-}
														
 
															+coords_cols = {"category": "Categories", "entity": "entity", "unit": "unit"}
														
 
															 add_coords_cols = {
														
 
															     "orig_cat_name": ["orig_cat_name", "category"],
														
 
															 }
														
 
															-#filter_remove = {
														
 
															+# filter_remove = {
														
 
															 #    "f1": {
														
 
															 #        "entity": ["CO2(grossemissions)", "CO2(removals)"],
														
 
															 #    },
														
 
															-#}
														
 
															+# }
														
 
															 meta_data = {
														
 
															     "references": "https://unfccc.int/documents/624776",
														
@@ -84,318 +85,322 @@ terminology_proc = coords_terminologies["category"]
 
															 table_def_templates = {
														
 
															     # CO2
														
 
															-    '203': {  # 203, 249
														
 
															-        "area": ['70,480,768,169'],
														
 
															+    "203": {  # 203, 249
														
 
															+        "area": ["70,480,768,169"],
														
 
															     },
														
 
															-    '204': {  # 204
														
 
															-        "area": ['70,500,763,141'],
														
 
															+    "204": {  # 204
														
 
															+        "area": ["70,500,763,141"],
														
 
															     },
														
 
															-    '205': {  # 205, 209, 2014, 2018
														
 
															-        "area": ['70,495,763,95'],
														
 
															+    "205": {  # 205, 209, 2014, 2018
														
 
															+        "area": ["70,495,763,95"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['5A Indirect N2O emissions from the atmospheric deposition of'],
														
 
															+            2: ["5A Indirect N2O emissions from the atmospheric deposition of"],
														
 
															         },
														
 
															     },
														
 
															-    '206': {  # 206
														
 
															-        "area": ['70,495,763,353'],
														
 
															+    "206": {  # 206
														
 
															+        "area": ["70,495,763,353"],
														
 
															     },
														
 
															-    '207': {  # 207, 208, 211, 212, 213, 215, 217, 223, 227, 231,
														
 
															+    "207": {  # 207, 208, 211, 212, 213, 215, 217, 223, 227, 231,
														
 
															         # 251, 257, 259, 263, 265
														
 
															-        "area": ['70,495,763,95'],
														
 
															+        "area": ["70,495,763,95"],
														
 
															     },
														
 
															-    '216': {  #  216
														
 
															-        "area": ['70,500,763,95'],
														
 
															+    "216": {  #  216
														
 
															+        "area": ["70,500,763,95"],
														
 
															     },
														
 
															     # CH4
														
 
															-    '219': {  # 219, 255
														
 
															-        "area": ['70,480,768,100'],
														
 
															+    "219": {  # 219, 255
														
 
															+        "area": ["70,480,768,100"],
														
 
															     },
														
 
															-    '220': {  # 220, 224, 228
														
 
															-        "area": ['70,495,763,95'],
														
 
															+    "220": {  # 220, 224, 228
														
 
															+        "area": ["70,495,763,95"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '221': {  # 221
														
 
															-        "area": ['92,508,748,92'],
														
 
															-        "cols": ['298,340,380,422,462,502,542,582,622,662,702'],
														
 
															+    "221": {  # 221
														
 
															+        "area": ["92,508,748,92"],
														
 
															+        "cols": ["298,340,380,422,462,502,542,582,622,662,702"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3C Aggregate sources and Non-CO2 emissions'],
														
 
															-            2: ['5A Indirect N2O emissions from the atmospheric'],
														
 
															+            3: ["3C Aggregate sources and Non-CO2 emissions"],
														
 
															+            2: ["5A Indirect N2O emissions from the atmospheric"],
														
 
															         },
														
 
															     },
														
 
															-    '222': {  # 222
														
 
															-        "area": ['70,495,763,323'],
														
 
															+    "222": {  # 222
														
 
															+        "area": ["70,495,763,323"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['Annual change in long-term storage of carbon in HWP'],
														
 
															+            2: ["Annual change in long-term storage of carbon in HWP"],
														
 
															         },
														
 
															     },
														
 
															-    '225': {  # 225
														
 
															-        "area": ['92,508,748,92'],
														
 
															-        "cols": ['311,357,400,443,486,529,572,615,658,701'],
														
 
															+    "225": {  # 225
														
 
															+        "area": ["92,508,748,92"],
														
 
															+        "cols": ["311,357,400,443,486,529,572,615,658,701"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3C Aggregate sources and Non-CO2 emissions'],
														
 
															+            3: ["3C Aggregate sources and Non-CO2 emissions"],
														
 
															         },
														
 
															     },
														
 
															-    '226': {  # 226, 230
														
 
															-        "area": ['70,495,763,95'],
														
 
															+    "226": {  # 226, 230
														
 
															+        "area": ["70,495,763,95"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['5A Indirect N2O emissions from the atmospheric',
														
 
															-                'Annual change in long-term storage of carbon in HWP'],
														
 
															+            2: [
														
 
															+                "5A Indirect N2O emissions from the atmospheric",
														
 
															+                "Annual change in long-term storage of carbon in HWP",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '229': {  # 229
														
 
															-        "area": ['114,508,725,92'],
														
 
															-        "cols": ['333,379,421,464,506,548,590,632,674'],
														
 
															+    "229": {  # 229
														
 
															+        "area": ["114,508,725,92"],
														
 
															+        "cols": ["333,379,421,464,506,548,590,632,674"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3C Aggregate sources and Non-CO2 emissions'],
														
 
															+            3: ["3C Aggregate sources and Non-CO2 emissions"],
														
 
															         },
														
 
															     },
														
 
															     # N2O
														
 
															-    '232': {  # 232
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['315,366,416,466,516,566,616,666,716'],
														
 
															+    "232": {  # 232
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["315,366,416,466,516,566,616,666,716"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '233': {  # 233
														
 
															-        "area": ['70,495,763,95'],
														
 
															+    "233": {  # 233
														
 
															+        "area": ["70,495,763,95"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3C Aggregate sources and Non-CO2 emissions'],
														
 
															+            3: ["3C Aggregate sources and Non-CO2 emissions"],
														
 
															         },
														
 
															     },
														
 
															-    '234': {  # 234
														
 
															-        "area": ['70,495,763,95'],
														
 
															+    "234": {  # 234
														
 
															+        "area": ["70,495,763,95"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['International Water-borne Transport (International'],
														
 
															+            3: ["International Water-borne Transport (International"],
														
 
															         },
														
 
															     },
														
 
															-    '236': {  # 236
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['298,344,392,439,487,534,580,629,675,721'],
														
 
															+    "236": {  # 236
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["298,344,392,439,487,534,580,629,675,721"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '240': {  # 240
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['283,329,372,416,459,504,550,594,639,682,726'],
														
 
															+    "240": {  # 240
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["283,329,372,416,459,504,550,594,639,682,726"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															     # HFCs
														
 
															-    '243': {  # 243
														
 
															-        "area": ['70,480,763,95'],
														
 
															-        "cols": ['408,449,489,527,567,604,644,681,721'],
														
 
															+    "243": {  # 243
														
 
															+        "area": ["70,480,763,95"],
														
 
															+        "cols": ["408,449,489,527,567,604,644,681,721"],
														
 
															     },
														
 
															-    '244': {  # 244
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['408,449,489,527,567,604,644,681,721'],
														
 
															+    "244": {  # 244
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["408,449,489,527,567,604,644,681,721"],
														
 
															     },
														
 
															-    '245': {  # 245, 246
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['405,442,478,515,550,587,621,657,693,729'],
														
 
															+    "245": {  # 245, 246
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["405,442,478,515,550,587,621,657,693,729"],
														
 
															     },
														
 
															-    '247': {  # 247, 248
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['384,426,459,493,531,564,597,633,666,700,735'],
														
 
															+    "247": {  # 247, 248
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["384,426,459,493,531,564,597,633,666,700,735"],
														
 
															     },
														
 
															     # PFCs
														
 
															-    '250': {  # 250
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['341,389,436,485,531,579,626,674,723'],
														
 
															+    "250": {  # 250
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["341,389,436,485,531,579,626,674,723"],
														
 
															     },
														
 
															-    '252': {  # 252
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['323,370,415,459,504,547,590,636,680,726'],
														
 
															+    "252": {  # 252
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["323,370,415,459,504,547,590,636,680,726"],
														
 
															     },
														
 
															-    '253': {  # 253
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['334,378,419,464,511,554,597,636,668,702,735'],
														
 
															+    "253": {  # 253
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["334,378,419,464,511,554,597,636,668,702,735"],
														
 
															     },
														
 
															-    '254': {  # 254
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['330,378,419,464,511,554,597,636,668,702,735'],
														
 
															+    "254": {  # 254
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["330,378,419,464,511,554,597,636,668,702,735"],
														
 
															         "rows_to_fix": {
														
 
															-            -3: ['2F Product Uses as Substitutes for Ozone Depleting Substances'],
														
 
															+            -3: ["2F Product Uses as Substitutes for Ozone Depleting Substances"],
														
 
															         },
														
 
															     },
														
 
															     # SF6
														
 
															-    '256': {  # 256
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['382,420,462,504,546,588,630,672,714'],
														
 
															+    "256": {  # 256
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["382,420,462,504,546,588,630,672,714"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '258': {  # 258
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['363,399,441,481,522,564,606,646,688,728'],
														
 
															+    "258": {  # 258
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["363,399,441,481,522,564,606,646,688,728"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '260': {  # 260
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['346,381,419,458,498,536,576,614,652,692,732'],
														
 
															+    "260": {  # 260
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["346,381,419,458,498,536,576,614,652,692,732"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															     # NF3
														
 
															-    '261': {  # 261
														
 
															-        "area": ['70,490,768,100'],
														
 
															-        "cols": ['364,412,454,496,538,581,623,667,710'],
														
 
															+    "261": {  # 261
														
 
															+        "area": ["70,490,768,100"],
														
 
															+        "cols": ["364,412,454,496,538,581,623,667,710"],
														
 
															     },
														
 
															-    '262': {  # 262
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['376,420,462,504,545,591,633,676,718'],
														
 
															+    "262": {  # 262
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["376,420,462,504,545,591,633,676,718"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '264': {  # 264
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['370,415,451,491,530,569,609,651,689,729'],
														
 
															+    "264": {  # 264
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["370,415,451,491,530,569,609,651,689,729"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															-    '266': {  # 266
														
 
															-        "area": ['70,495,763,95'],
														
 
															-        "cols": ['355,392,430,467,505,544,580,619,656,695,732'],
														
 
															+    "266": {  # 266
														
 
															+        "area": ["70,495,763,95"],
														
 
															+        "cols": ["355,392,430,467,505,544,580,619,656,695,732"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
														
 
															+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 table_defs = {
														
 
															-    '203': {"template": '203', "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
														
 
															-    '204': {"template": '204', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '205': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '206': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '207': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '208': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '209': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '210': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '211': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '212': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '213': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '214': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '215': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '216': {"template": '216', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '217': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '218': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															-    '219': {"template": '219', "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
														
 
															-    '220': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '221': {"template": '221', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '222': {"template": '222', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '223': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '224': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '225': {"template": '225', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '226': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '227': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '228': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '229': {"template": '229', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '230': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															-    '231': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
														
 
															-    '232': {"template": '232', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '233': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '234': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '235': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '236': {"template": '236', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '237': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '238': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '239': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '240': {"template": '240', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '241': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '242': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															-    '243': {"template": '243', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},  # HFCs
														
 
															-    '244': {"template": '244', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '245': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '246': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '247': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '248': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '249': {"template": '203', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},  # PFCs
														
 
															-    '250': {"template": '250', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '251': {"template": '207', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '252': {"template": '252', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '253': {"template": '253', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '254': {"template": '254', "entity": f"PFCS ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '255': {"template": '219', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},  # SF6
														
 
															-    '256': {"template": '256', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '257': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '258': {"template": '258', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '259': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '260': {"template": '260', "entity": f"SF6 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '261': {"template": '261', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},  # NF3
														
 
															-    '262': {"template": '262', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '263': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '264': {"template": '264', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '265': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															-    '266': {"template": '266', "entity": f"NF3 ({gwp_to_use})",
														
 
															-            "unit": "Gg CO2 / yr"},
														
 
															+    "203": {"template": "203", "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
														
 
															+    "204": {"template": "204", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "205": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "206": {"template": "206", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "207": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "208": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "209": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "210": {"template": "206", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "211": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "212": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "213": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "214": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "215": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "216": {"template": "216", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "217": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "218": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
														
 
															+    "219": {"template": "219", "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
														
 
															+    "220": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "221": {"template": "221", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "222": {"template": "222", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "223": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "224": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "225": {"template": "225", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "226": {"template": "226", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "227": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "228": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "229": {"template": "229", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "230": {"template": "226", "entity": "CH4", "unit": "Gg CH4 / yr"},
														
 
															+    "231": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
														
 
															+    "232": {"template": "232", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "233": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "234": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "235": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "236": {"template": "236", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "237": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "238": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "239": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "240": {"template": "240", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "241": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "242": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
														
 
															+    "243": {
														
 
															+        "template": "243",
														
 
															+        "entity": f"HFCS ({gwp_to_use})",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # HFCs
														
 
															+    "244": {"template": "244", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "245": {"template": "245", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "246": {"template": "245", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "247": {"template": "247", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "248": {"template": "247", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "249": {
														
 
															+        "template": "203",
														
 
															+        "entity": f"PFCS ({gwp_to_use})",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # PFCs
														
 
															+    "250": {"template": "250", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "251": {"template": "207", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "252": {"template": "252", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "253": {"template": "253", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "254": {"template": "254", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "255": {
														
 
															+        "template": "219",
														
 
															+        "entity": f"SF6 ({gwp_to_use})",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # SF6
														
 
															+    "256": {"template": "256", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "257": {"template": "207", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "258": {"template": "258", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "259": {"template": "207", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "260": {"template": "260", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "261": {
														
 
															+        "template": "261",
														
 
															+        "entity": f"NF3 ({gwp_to_use})",
														
 
															+        "unit": "Gg CO2 / yr",
														
 
															+    },  # NF3
														
 
															+    "262": {"template": "262", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "263": {"template": "207", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "264": {"template": "264", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "265": {"template": "207", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															+    "266": {"template": "266", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
														
 
															 }
														
 
															 country_processing_step1 = {
														
 
															-    'aggregate_cats': {
														
 
															-        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
														
 
															-                                 '3.C.6', '3.C.7', '3.C.8'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
														
 
															-                             '(Agriculture)'},
														
 
															-        'M.3.D.AG': {'sources': ['3.D.2'],
														
 
															-                     'name': 'Other (Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
														
 
															-                     'name': 'Agriculture excluding livestock'},
														
 
															-        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
														
 
															-                     'name': 'Agriculture'},
														
 
															-        'M.3.D.LU': {'sources': ['3.D.1'],
														
 
															-                     'name': 'Other (LULUCF)'},
														
 
															-        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
														
 
															-                     'name': 'LULUCF'},
														
 
															-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
														
 
															-                     'name': 'National total emissions excluding LULUCF'},
														
 
															-    },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "aggregate_cats": {
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": [
														
 
															+                "3.C.1",
														
 
															+                "3.C.2",
														
 
															+                "3.C.3",
														
 
															+                "3.C.4",
														
 
															+                "3.C.5",
														
 
															+                "3.C.6",
														
 
															+                "3.C.7",
														
 
															+                "3.C.8",
														
 
															+            ],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land "
														
 
															+            "(Agriculture)",
														
 
															+        },
														
 
															+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG", "M.3.D.AG"],
														
 
															+            "name": "Agriculture excluding livestock",
														
 
															+        },
														
 
															+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
														
 
															+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
														
 
															+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
														
 
															+        "M.0.EL": {
														
 
															+            "sources": ["1", "2", "M.AG", "4", "5"],
														
 
															+            "name": "National total emissions excluding LULUCF",
														
 
															+        },
														
 
															+    },
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
 
															 gas_baskets = {
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
														
 
															+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
														
 
															+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
														
 
															+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
@@ -1,8 +1,15 @@
 
															-# this script reads data from Malaysia's BUR3
														
 
															+"""
														
 
															+Read Malaysia's BUR3 from pdf
														
 
															+
														
 
															+This script reads data from Malaysia's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 import camelot
														
 
															 import primap2 as pm2
														
 
															-from .config_mys_bur3 import (
														
 
															+from config_mys_bur3 import (
														
 
															     cat_code_regexp,
														
 
															     cat_codes_manual,
														
 
															     cat_names_fix,
														
@@ -33,8 +40,8 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Malaysia" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Malaysia"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
@@ -47,7 +54,7 @@ if __name__ == "__main__":
 
															     # SF6: 234 - 237
														
 
															     # NF3: 238 - 241
														
 
															-    output_filename = 'MYS_BUR3_2020_'
														
 
															+    output_filename = "MYS_BUR3_2020_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															     # ###
														
@@ -62,36 +69,44 @@ if __name__ == "__main__":
 
															         area = table_def_templates[page_template_nr]["area"]
														
 
															         if "cols" in table_def_templates[page_template_nr].keys():
														
 
															             cols = table_def_templates[page_template_nr]["cols"]
														
 
															-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
														
 
															-                                      flavor='stream', table_areas=area, columns=cols,
														
 
															-                                      split_text=True)
														
 
															+            tables = camelot.read_pdf(
														
 
															+                str(input_folder / pdf_file),
														
 
															+                pages=str(page),
														
 
															+                flavor="stream",
														
 
															+                table_areas=area,
														
 
															+                columns=cols,
														
 
															+                split_text=True,
														
 
															+            )
														
 
															         else:
														
 
															-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
														
 
															-                                      flavor='stream', table_areas=area)
														
 
															+            tables = camelot.read_pdf(
														
 
															+                str(input_folder / pdf_file),
														
 
															+                pages=str(page),
														
 
															+                flavor="stream",
														
 
															+                table_areas=area,
														
 
															+            )
														
 
															         df_current = tables[0].df.copy()
														
 
															-        df_current.iloc[0,0] = 'Categories'
														
 
															+        df_current.iloc[0, 0] = "Categories"
														
 
															         df_current.columns = df_current.iloc[0]
														
 
															         df_current = df_current.drop(0)
														
 
															         # replace double \n
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("\n", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
														
 
															         # replace double and triple spaces
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("   ", " ")
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("  ", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
														
 
															         # fix the split rows
														
 
															         if "rows_to_fix" in table_def_templates[page_template_nr].keys():
														
 
															             for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
														
 
															-                df_current = fix_rows(df_current,
														
 
															-                                      table_def_templates[page_template_nr]["rows_to_fix"][
														
 
															-                                          n_rows], index_cols[0], n_rows)
														
 
															+                df_current = fix_rows(
														
 
															+                    df_current,
														
 
															+                    table_def_templates[page_template_nr]["rows_to_fix"][n_rows],
														
 
															+                    index_cols[0],
														
 
															+                    n_rows,
														
 
															+                )
														
 
															         # replace category names with typos
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].replace(cat_names_fix)
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
														
 
															         # replace empty stings
														
 
															         df_current = df_current.replace(values_replacement)
														
@@ -106,7 +121,7 @@ if __name__ == "__main__":
 
															         for col in cols_for_space_stripping:
														
 
															             df_current[col] = df_current[col].str.strip()
														
 
															-        # print(df_current.columns.values)
														
 
															+        # print(df_current.columns.to_numpy())
														
 
															         # aggregate dfs
														
 
															         if df_all is None:
														
@@ -118,10 +133,11 @@ if __name__ == "__main__":
 
															             cols_both = list(set(cols_all).intersection(set(cols_current)))
														
 
															             # print(cols_both)
														
 
															             if len(cols_both) > 0:
														
 
															-                df_all = df_all.merge(df_current, how='outer', on=cols_both,
														
 
															-                                      suffixes=(None, None))
														
 
															+                df_all = df_all.merge(
														
 
															+                    df_current, how="outer", on=cols_both, suffixes=(None, None)
														
 
															+                )
														
 
															             else:
														
 
															-                df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
														
 
															+                df_all = df_all.merge(df_current, how="outer", suffixes=(None, None))
														
 
															             df_all = df_all.groupby(index_cols).first().reset_index()
														
 
															             # df_all = df_all.join(df_current, how='outer')
														
@@ -137,28 +153,38 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "Categories"
														
 
															     # first the manual replacements
														
 
															     df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex repalcements
														
 
															-    def repl(m):
														
 
															-        return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															-    df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															+    df_all["Categories"] = df_all["Categories"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     # make sure all col headers are str
														
 
															     df_all.columns = df_all.columns.map(str)
														
 
															     # remove thousands separators as pd.to_numeric can't deal with that
														
 
															     # also replace None with NaN
														
 
															-    year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
														
 
															+    year_cols = list(
														
 
															+        set(df_all.columns) - set(["Categories", "entity", "unit", "orig_cat_name"])
														
 
															+    )
														
 
															     for col in year_cols:
														
 
															         df_all.loc[:, col] = df_all.loc[:, col].str.strip()
														
 
															-        def repl(m):
														
 
															-            return m.group('part1') + m.group('part2')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-        df_all[col][df_all[col].isnull()] = 'NaN'
														
 
															+
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace(
														
 
															+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+        )
														
 
															+        df_all[col][df_all[col].isna()] = "NaN"
														
 
															         # manually map code NENO to nan
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("NENO", "NaN")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("O NANaN", "NaN")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NO", "0")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NA NO I", "0")
														
 
															         # TODO: add code to PRIMAP2
														
 
															     # drop orig_cat_name as it's non-unique per category
														
@@ -167,17 +193,17 @@ if __name__ == "__main__":
 
															     data_if = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_all,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															-        #coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_mapping=coords_value_mapping,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
@@ -190,12 +216,15 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															-        data_if)
														
 
															+        data_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # ## process the data
														
@@ -211,9 +240,9 @@ if __name__ == "__main__":
 
															     )
														
 
															     # adapt source and metadata
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -222,9 +251,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
@@ -1,10 +1,16 @@
 
															-# this script reads data from Malaysia's BUR4
														
 
															-# code ist mostly identical to BUR3
														
 
															+"""
														
 
															+Read Malaysia's BUR3 from pdf
														
 
															+
														
 
															+This script reads data from Malaysia's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+Code ist mostly identical to BUR3
														
 
															+"""
														
 
															 import camelot
														
 
															 import primap2 as pm2
														
 
															-from .config_mys_bur4 import (
														
 
															+from config_mys_bur4 import (
														
 
															     cat_code_regexp,
														
 
															     cat_codes_manual,
														
 
															     cat_names_fix,
														
@@ -35,8 +41,8 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR4'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Malaysia" / "BUR4"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Malaysia"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
@@ -50,7 +56,7 @@ if __name__ == "__main__":
 
															     # SF6: 255 - 260
														
 
															     # NF3: 261 - 266
														
 
															-    output_filename = 'MYS_BUR4_2022_'
														
 
															+    output_filename = "MYS_BUR4_2022_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															     # ###
														
@@ -65,36 +71,44 @@ if __name__ == "__main__":
 
															         area = table_def_templates[page_template_nr]["area"]
														
 
															         if "cols" in table_def_templates[page_template_nr].keys():
														
 
															             cols = table_def_templates[page_template_nr]["cols"]
														
 
															-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
														
 
															-                                      flavor='stream', table_areas=area, columns=cols,
														
 
															-                                      split_text=True)
														
 
															+            tables = camelot.read_pdf(
														
 
															+                str(input_folder / pdf_file),
														
 
															+                pages=str(page),
														
 
															+                flavor="stream",
														
 
															+                table_areas=area,
														
 
															+                columns=cols,
														
 
															+                split_text=True,
														
 
															+            )
														
 
															         else:
														
 
															-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
														
 
															-                                      flavor='stream', table_areas=area)
														
 
															+            tables = camelot.read_pdf(
														
 
															+                str(input_folder / pdf_file),
														
 
															+                pages=str(page),
														
 
															+                flavor="stream",
														
 
															+                table_areas=area,
														
 
															+            )
														
 
															         df_current = tables[0].df.copy()
														
 
															-        df_current.iloc[0,0] = 'Categories'
														
 
															+        df_current.iloc[0, 0] = "Categories"
														
 
															         df_current.columns = df_current.iloc[0]
														
 
															         df_current = df_current.drop(0)
														
 
															         # replace double \n
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("\n", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
														
 
															         # replace double and triple spaces
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("   ", " ")
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].str.replace("  ", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
														
 
															         # fix the split rows
														
 
															         if "rows_to_fix" in table_def_templates[page_template_nr].keys():
														
 
															             for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
														
 
															-                df_current = fix_rows(df_current,
														
 
															-                                      table_def_templates[page_template_nr]["rows_to_fix"][
														
 
															-                                          n_rows], index_cols[0], n_rows)
														
 
															+                df_current = fix_rows(
														
 
															+                    df_current,
														
 
															+                    table_def_templates[page_template_nr]["rows_to_fix"][n_rows],
														
 
															+                    index_cols[0],
														
 
															+                    n_rows,
														
 
															+                )
														
 
															         # replace category names with typos
														
 
															-        df_current[index_cols[0]] = \
														
 
															-            df_current[index_cols[0]].replace(cat_names_fix)
														
 
															+        df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
														
 
															         # replace empty stings
														
 
															         df_current = df_current.replace(values_replacement)
														
@@ -109,22 +123,23 @@ if __name__ == "__main__":
 
															         for col in cols_for_space_stripping:
														
 
															             df_current[col] = df_current[col].str.strip()
														
 
															-        # print(df_current.columns.values)
														
 
															+        # print(df_current.columns.to_numpy())
														
 
															         # aggregate dfs
														
 
															         if df_all is None:
														
 
															             df_all = df_current
														
 
															         else:
														
 
															             # find intersecting cols
														
 
															-            cols_all = df_all.columns.values
														
 
															-            cols_current = df_current.columns.values
														
 
															+            cols_all = df_all.columns.to_numpy()
														
 
															+            cols_current = df_current.columns.to_numpy()
														
 
															             cols_both = list(set(cols_all).intersection(set(cols_current)))
														
 
															             # print(cols_both)
														
 
															             if len(cols_both) > 0:
														
 
															-                df_all = df_all.merge(df_current, how='outer', on=cols_both,
														
 
															-                                      suffixes=(None, None))
														
 
															+                df_all = df_all.merge(
														
 
															+                    df_current, how="outer", on=cols_both, suffixes=(None, None)
														
 
															+                )
														
 
															             else:
														
 
															-                df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
														
 
															+                df_all = df_all.merge(df_current, how="outer", suffixes=(None, None))
														
 
															             df_all = df_all.groupby(index_cols).first().reset_index()
														
 
															             # df_all = df_all.join(df_current, how='outer')
														
@@ -140,28 +155,38 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "Categories"
														
 
															     # first the manual replacements
														
 
															     df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex repalcements
														
 
															-    def repl(m):
														
 
															-        return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															-    df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															+    df_all["Categories"] = df_all["Categories"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     # make sure all col headers are str
														
 
															     df_all.columns = df_all.columns.map(str)
														
 
															     # remove thousands separators as pd.to_numeric can't deal with that
														
 
															     # also replace None with NaN
														
 
															-    year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
														
 
															+    year_cols = list(
														
 
															+        set(df_all.columns) - set(["Categories", "entity", "unit", "orig_cat_name"])
														
 
															+    )
														
 
															     for col in year_cols:
														
 
															         df_all.loc[:, col] = df_all.loc[:, col].str.strip()
														
 
															-        def repl(m):
														
 
															-            return m.group('part1') + m.group('part2')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-        df_all[col][df_all[col].isnull()] = 'NaN'
														
 
															+
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace(
														
 
															+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+        )
														
 
															+        df_all[col][df_all[col].isna()] = "NaN"
														
 
															         # manually map code NENO to nan
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
														
 
															-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("NENO", "NaN")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("O NANaN", "NaN")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NO", "0")
														
 
															+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NA NO I", "0")
														
 
															         # TODO: add code to PRIMAP2
														
 
															     # drop orig_cat_name as it's non-unique per category
														
@@ -170,17 +195,17 @@ if __name__ == "__main__":
 
															     data_if = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_all,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															-        #coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_mapping=coords_value_mapping,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
@@ -193,12 +218,15 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															-        data_if)
														
 
															+        data_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # ## process the data
														
@@ -214,9 +242,9 @@ if __name__ == "__main__":
 
															     )
														
 
															     # adapt source and metadata
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -225,9 +253,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Mexico's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'MEX'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=MEX
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/config_mex_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/config_mex_bur3.py
@@ -1,8 +1,42 @@
 
															+"""Config for Mexico's BUR3
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 import pandas as pd
														
 
															-def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
														
 
															-             n_rows: int) -> pd.DataFrame:
														
 
															+def fix_rows(
														
 
															+    data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
														
 
															+) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Combine split rows
														
 
															+
														
 
															+    This function combines rows which have been split into several rows during data
														
 
															+    reading from pdf because they contained line breaks.
														
 
															+
														
 
															+    Parameters
														
 
															+    ----------
														
 
															+    data: pd.DataFrame
														
 
															+        The data to work with
														
 
															+    rows_to_fix: list
														
 
															+        List of values for which to fix rows
														
 
															+    col_to_use: str
														
 
															+        column to use to find the rows to merge
														
 
															+    n_rows: int
														
 
															+        How many rows to combine for each row found. e.g. 3 means combine the found
														
 
															+        row with the following two rows. Negative values are used for more
														
 
															+        complicated situations where the rows to merge are also before the position
														
 
															+        of the value that indicates the merge. See code for details
														
 
															+
														
 
															+    Returns
														
 
															+    -------
														
 
															+        pandas DataFrame with combined rows. The individual rows are removed
														
 
															+
														
 
															+    TODO: move function to helper module (make sure to have one function that works
														
 
															+     for all cases)
														
 
															+    """
														
 
															     for row in rows_to_fix:
														
 
															         # print(row)
														
 
															         # find the row number and collect the row and the next two rows
														
@@ -16,29 +50,29 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
 
															         for item in index:
														
 
															             loc = data.index.get_loc(item)
														
 
															             ####print(data[col_to_use].loc[loc + 1])
														
 
															-            if n_rows == -2:
														
 
															+            if n_rows == -2:  # noqa: PLR2004
														
 
															                 locs_to_merge = list(range(loc - 1, loc + 1))
														
 
															                 loc_to_check = loc - 1
														
 
															-            if n_rows == -6:
														
 
															+            elif n_rows == -6:  # noqa: PLR2004
														
 
															                 locs_to_merge = list(range(loc - 3, loc + 3))
														
 
															                 loc_to_check = loc - 3
														
 
															-            elif n_rows == -3:
														
 
															+            elif n_rows == -3:  # noqa: PLR2004
														
 
															                 locs_to_merge = list(range(loc - 1, loc + 2))
														
 
															                 loc_to_check = loc - 1
														
 
															             else:
														
 
															                 locs_to_merge = list(range(loc, loc + n_rows))
														
 
															                 loc_to_check = loc + 1
														
 
															-            if (data[col_to_use].loc[loc_to_check] == '') or n_rows == 2:
														
 
															+            if (not data[col_to_use].loc[loc_to_check]) or n_rows == 2:  # noqa: PLR2004
														
 
															                 rows_to_merge = data.iloc[locs_to_merge]
														
 
															                 indices_to_merge = rows_to_merge.index
														
 
															                 # replace numerical NaN values
														
 
															                 ####print(rows_to_merge)
														
 
															-                rows_to_merge = rows_to_merge.fillna('')
														
 
															+                rows_to_merge = rows_to_merge.fillna("")
														
 
															                 ####print("fillna")
														
 
															                 ####print(rows_to_merge)
														
 
															                 # join the three rows
														
 
															-                new_row = rows_to_merge.agg(' '.join)
														
 
															+                new_row = rows_to_merge.agg(" ".join)
														
 
															                 # replace the double spaces that are created
														
 
															                 # must be done here and not at the end as splits are not always
														
 
															                 # the same and join would produce different col values
														
@@ -54,67 +88,77 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
 
															         data = data.reset_index(drop=True)
														
 
															     return data
														
 
															+
														
 
															 page_defs = {
														
 
															-    '118': {
														
 
															+    "118": {
														
 
															         "camelot": {
														
 
															-            "table_areas": ['49,602,551,73'],
														
 
															-            "columns": ['223,277,314,348,392,422,446,483'],
														
 
															+            "table_areas": ["49,602,551,73"],
														
 
															+            "columns": ["223,277,314,348,392,422,446,483"],
														
 
															             "split_text": False,
														
 
															             "flavor": "stream",
														
 
															         },
														
 
															         "rows_to_fix": {
														
 
															             -6: ["Categorías de fuentes y"],
														
 
															-            3: ["Todas las emisiones y las absorciones",
														
 
															+            3: [
														
 
															+                "Todas las emisiones y las absorciones",
														
 
															                 "Todas las emisiones (sin [3B] Tierra ni",
														
 
															                 "[1A] Actividades de quema del",
														
 
															                 "[1A2] Industrias manufactura y de la",
														
 
															                 "[1B] Emisiones fugitivas provenientes de",
														
 
															-                "[2] Procesos industriales y uso de"],
														
 
															+                "[2] Procesos industriales y uso de",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '119': {
														
 
															+    "119": {
														
 
															         "camelot": {
														
 
															-            "table_areas": ['49,650,551,77'],
														
 
															-            "columns": ['228,275,317,352,394,421,446,483'],
														
 
															+            "table_areas": ["49,650,551,77"],
														
 
															+            "columns": ["228,275,317,352,394,421,446,483"],
														
 
															             "split_text": True,
														
 
															             "flavor": "stream",
														
 
															         },
														
 
															         "rows_to_fix": {
														
 
															             -6: ["Categorías de fuentes y"],
														
 
															-            3: ["[2B4] Producción de caprolactama,",
														
 
															+            3: [
														
 
															+                "[2B4] Producción de caprolactama,",
														
 
															                 "[2B8] Producción petroquímica y negro",
														
 
															                 "[2D] Uso de productos no energéticos de",
														
 
															-                "[2E1] Circuitos integrados o"],
														
 
															+                "[2E1] Circuitos integrados o",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '120': {
														
 
															+    "120": {
														
 
															         "camelot": {
														
 
															-            "table_areas": ['49,650,551,77'],
														
 
															-            "columns": ['223,277,314,348,392,422,446,483'],
														
 
															+            "table_areas": ["49,650,551,77"],
														
 
															+            "columns": ["223,277,314,348,392,422,446,483"],
														
 
															             "split_text": False,
														
 
															             "flavor": "stream",
														
 
															         },
														
 
															         "rows_to_fix": {
														
 
															             -6: ["Categorías de fuentes y"],
														
 
															             -3: ["[3B] Tierra"],
														
 
															-            3: ["[2F] Uso de productos sustitutos de las",
														
 
															+            3: [
														
 
															+                "[2F] Uso de productos sustitutos de las",
														
 
															                 "[2G] Manufactura y utilización de otros",
														
 
															-                "[3] Agricultura, silvicultura y otros usos"],
														
 
															-            2: ["[2H2] Industria de la alimentación y las",
														
 
															-                "[2G2] SF₆ y PFC de otros usos de"],
														
 
															+                "[3] Agricultura, silvicultura y otros usos",
														
 
															+            ],
														
 
															+            2: [
														
 
															+                "[2H2] Industria de la alimentación y las",
														
 
															+                "[2G2] SF₆ y PFC de otros usos de",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '121': {
														
 
															+    "121": {
														
 
															         "camelot": {
														
 
															-            "table_areas": ['49,650,551,70'],
														
 
															-            "columns": ['223,277,314,348,392,422,446,483'],
														
 
															+            "table_areas": ["49,650,551,70"],
														
 
															+            "columns": ["223,277,314,348,392,422,446,483"],
														
 
															             "split_text": False,
														
 
															             "flavor": "stream",
														
 
															         },
														
 
															         "rows_to_fix": {
														
 
															             -6: ["Categorías de fuentes y"],
														
 
															             -3: ["[3B1] Tierra forestales"],
														
 
															-            3: ["[3C] Fuentes agregadas y fuentes de",
														
 
															+            3: [
														
 
															+                "[3C] Fuentes agregadas y fuentes de",
														
 
															                 "[3C1] Emisiones de GEI por quemado de",
														
 
															                 "[3C4] Emisiones directas de los N₂O de",
														
 
															                 "[3C5] Emisiones indirectas de los N₂O de",
														
@@ -123,24 +167,26 @@ page_defs = {
 
															                 "[4A2] Sitios no controlados de",
														
 
															                 "[4A3] Tiraderos a cielo abierto para",
														
 
															                 "[4B] Tratamiento biológico de los",
														
 
															-                ],
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '122': {
														
 
															+    "122": {
														
 
															         "camelot": {
														
 
															-            "table_areas": ['49,650,551,404'],
														
 
															-            "columns": ['223,277,314,348,392,422,446,483'],
														
 
															+            "table_areas": ["49,650,551,404"],
														
 
															+            "columns": ["223,277,314,348,392,422,446,483"],
														
 
															             "split_text": False,
														
 
															             "flavor": "stream",
														
 
															         },
														
 
															         "rows_to_fix": {
														
 
															             -6: ["Categorías de fuentes y"],
														
 
															-            3: ["[4C] Incineración y quema a cielo abierto",
														
 
															+            3: [
														
 
															+                "[4C] Incineración y quema a cielo abierto",
														
 
															                 "[4C1] Incineración de residuos peligrosos",
														
 
															                 "[4C2] Quema a cielo abierto de residuos",
														
 
															                 "[4D] Tratamiento y eliminación de aguas",
														
 
															                 "[4D1] Tratamiento y eliminación de",
														
 
															-                "[4D2] Tratamiento y eliminación de"],
														
 
															+                "[4D2] Tratamiento y eliminación de",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
@@ -1,10 +1,15 @@
 
															-# this script reads data from Mexico's BUR3
														
 
															-# Data is read from the pdf file
														
 
															+"""
														
 
															+Read Mexico's BUR3 from pdf
														
 
															+
														
 
															+This script reads data from Mexico's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_mex_bur3 import fix_rows, page_defs
														
 
															+from config_mex_bur3 import fix_rows, page_defs
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -12,16 +17,16 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Mexico' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Mexico'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Mexico" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Mexico"
														
 
															     if not output_folder.exists():
														
 
															-       output_folder.mkdir()
														
 
															+        output_folder.mkdir()
														
 
															-    output_filename = 'MEX_BUR3_2022_'
														
 
															+    output_filename = "MEX_BUR3_2022_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															-    inventory_file = 'Mexico_3er_BUR.pdf'
														
 
															+    inventory_file = "Mexico_3er_BUR.pdf"
														
 
															-    gwp_to_use = 'AR5GWP100'
														
 
															+    gwp_to_use = "AR5GWP100"
														
 
															     year = 2019
														
 
															     entity_row = 0
														
 
															     unit_row = 1
														
@@ -43,12 +48,12 @@ if __name__ == "__main__":
 
															     # manual category codes
														
 
															     cat_codes_manual = {
														
 
															-        'Todas las emisiones y las absorciones nacionales': '0',
														
 
															-        'Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada': 'M0EL',
														
 
															-        '2F6 Otras aplicaciones': '2F6',
														
 
															+        "Todas las emisiones y las absorciones nacionales": "0",
														
 
															+        "Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada": "M0EL",
														
 
															+        "2F6 Otras aplicaciones": "2F6",
														
 
															     }
														
 
															-    cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
														
 
															+    cat_code_regexp = r"^\[(?P<code>[a-zA-Z0-9]{1,3})\].*"
														
 
															     coords_cols = {
														
 
															         "category": "category",
														
@@ -77,18 +82,17 @@ if __name__ == "__main__":
 
															         "unit": "PRIMAP1",
														
 
															         "category": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'CH₄': 'CH4',
														
 
															-            'CO₂': 'CO2',
														
 
															-            'EMISIONES NETAS PCG AR5': 'KYOTOGHG (AR5GWP100)',
														
 
															-            'HFC': f"HFCS ({gwp_to_use})",
														
 
															-            'NF₃': f"NF3 ({gwp_to_use})",
														
 
															-            'N₂O': 'N2O',
														
 
															-            'PFC': f"PFCS ({gwp_to_use})",
														
 
															-            'SF₆': f"SF6 ({gwp_to_use})",
														
 
															+            "CH₄": "CH4",
														
 
															+            "CO₂": "CO2",
														
 
															+            "EMISIONES NETAS PCG AR5": "KYOTOGHG (AR5GWP100)",
														
 
															+            "HFC": f"HFCS ({gwp_to_use})",
														
 
															+            "NF₃": f"NF3 ({gwp_to_use})",
														
 
															+            "N₂O": "N2O",
														
 
															+            "PFC": f"PFCS ({gwp_to_use})",
														
 
															+            "SF₆": f"SF6 ({gwp_to_use})",
														
 
															         },
														
 
															     }
														
 
															-
														
 
															     filter_remove = {}
														
 
															     filter_keep = {}
														
@@ -102,11 +106,6 @@ if __name__ == "__main__":
 
															         "institution": "UNFCCC",
														
 
															     }
														
 
															-    # convert to mass units where possible
														
 
															-    entities_to_convert_to_mass = [
														
 
															-        'NF3', 'SF6'
														
 
															-    ]
														
 
															-
														
 
															     # ###
														
 
															     # read the data from pdf into one long format dataframe
														
 
															     # ###
														
@@ -114,8 +113,9 @@ if __name__ == "__main__":
 
															     for page in page_defs.keys():
														
 
															         print(f"Working on page {page}")
														
 
															         page_def = page_defs[page]
														
 
															-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=page,
														
 
															-                                  **page_def["camelot"])
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / inventory_file), pages=page, **page_def["camelot"]
														
 
															+        )
														
 
															         df_this_table = tables[0].df
														
 
															         # fix rows
														
@@ -127,31 +127,36 @@ if __name__ == "__main__":
 
															             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("-", "-")
														
 
															             # replace double space in entity
														
 
															             df_this_table.iloc[0, :] = df_this_table.iloc[0, :].str.replace("  ", " ")
														
 
															-            df_this_table = fix_rows(df_this_table, page_def["rows_to_fix"][n_rows], 0,
														
 
															-                                     n_rows)
														
 
															+            df_this_table = fix_rows(
														
 
															+                df_this_table, page_def["rows_to_fix"][n_rows], 0, n_rows
														
 
															+            )
														
 
															         # add units
														
 
															-        for col in df_this_table.columns.values:
														
 
															+        for col in df_this_table.columns.to_numpy():
														
 
															             if df_this_table[col].iloc[0] in units.keys():
														
 
															                 df_this_table[col].iloc[1] = units[df_this_table[col].iloc[0]]
														
 
															         # bring in right format for conversion to long format
														
 
															-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
														
 
															-                                                           entity_row=entity_row,
														
 
															-                                                           regexp_unit=".*",
														
 
															-                                                           regexp_entity=".*",
														
 
															-                                                           default_unit="GgCO2eq")
														
 
															+        df_this_table = pm2.pm2io.nir_add_unit_information(
														
 
															+            df_this_table,
														
 
															+            unit_row=unit_row,
														
 
															+            entity_row=entity_row,
														
 
															+            regexp_unit=".*",
														
 
															+            regexp_entity=".*",
														
 
															+            default_unit="GgCO2eq",
														
 
															+        )
														
 
															         # set index and convert to long format
														
 
															         df_this_table = df_this_table.set_index(index_cols)
														
 
															-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
														
 
															-                                                              header_long)
														
 
															+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            df_this_table, year, header_long
														
 
															+        )
														
 
															         # combine with tables for other sectors (merge not append)
														
 
															         if df_all is None:
														
 
															             df_all = df_this_table_long
														
 
															         else:
														
 
															-            df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
														
 
															+            df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
														
 
															     # ###
														
 
															     # conversion to PM2 IF
														
@@ -162,15 +167,19 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															     df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-       return m.group('code')
														
 
															-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_all["category"] = df_all["category"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     # replace "," and " " with "" in data
														
 
															-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(',','', regex=False)
														
 
															-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ','', regex=False)
														
 
															+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(",", "", regex=False)
														
 
															+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(" ", "", regex=False)
														
 
															     # make sure all col headers are str
														
 
															     df_all.columns = df_all.columns.map(str)
														
@@ -185,12 +194,13 @@ if __name__ == "__main__":
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															-        convert_str=True
														
 
															-        )
														
 
															+        convert_str=True,
														
 
															+        time_format="%Y",
														
 
															+    )
														
 
															     cat_label = "category (IPCC2006)"
														
 
															     # fix error cats
														
@@ -198,21 +208,6 @@ if __name__ == "__main__":
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															-    # convert to mass units from CO2eq
														
 
															-
														
 
															-    entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in
														
 
															-                           entities_to_convert_to_mass]
														
 
															-
														
 
															-    for entity in entities_to_convert:
														
 
															-        converted = data_pm2[entity].pr.convert_to_mass()
														
 
															-        basic_entity = entity.split(" ")[0]
														
 
															-        converted = converted.to_dataset(name=basic_entity)
														
 
															-        data_pm2 = data_pm2.pr.merge(converted)
														
 
															-        data_pm2[basic_entity].attrs["entity"] = basic_entity
														
 
															-
														
 
															-    # drop the GWP data
														
 
															-    data_pm2 = data_pm2.drop_vars(entities_to_convert)
														
 
															-
														
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -222,9 +217,11 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Montenegro's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'MNE'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=MNE
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/config_mne_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/config_mne_bur3.py
@@ -1,67 +1,123 @@
 
															+"""Config for Montenegro's BUR3
														
 
															+
														
 
															+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
														
 
															+config and metadata are define din the reading script
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 # most time series are contained twice and 2005 data is also contained twice. Some
														
 
															 # data is inconsistent and we remove the time series with errors
														
 
															 drop_data = {
														
 
															-    2: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
														
 
															-        "cats": ["1", "1.A", "1.A.1", "1.A.1", "1.A.2", "1.A.3", "1.A.4", "1.A.5", "1.B", "1.B.1", "1.B.2",
														
 
															-                 "2", "2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H",
														
 
															-                 "3", "3.A", "3.B"],
														
 
															-        #"years": ["2005"], # 2005 data copy of 2019
														
 
															+    2: {  # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
														
 
															+        "cats": [
														
 
															+            "1",
														
 
															+            "1.A",
														
 
															+            "1.A.1",
														
 
															+            "1.A.1",
														
 
															+            "1.A.2",
														
 
															+            "1.A.3",
														
 
															+            "1.A.4",
														
 
															+            "1.A.5",
														
 
															+            "1.B",
														
 
															+            "1.B.1",
														
 
															+            "1.B.2",
														
 
															+            "2",
														
 
															+            "2.A",
														
 
															+            "2.B",
														
 
															+            "2.C",
														
 
															+            "2.D",
														
 
															+            "2.E",
														
 
															+            "2.F",
														
 
															+            "2.G",
														
 
															+            "2.H",
														
 
															+            "3",
														
 
															+            "3.A",
														
 
															+            "3.B",
														
 
															+        ],
														
 
															+        # "years": ["2005"], # 2005 data copy of 2019
														
 
															     },
														
 
															-    3: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
														
 
															-        "cats": ["3.C", "3.D", "3.E", "3.F", "3.G", "5", "5.A", "5.B", "5.C", "5.D", "6"]
														
 
															-        #"years": ["2005"],
														
 
															+    3: {  # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
														
 
															+        "cats": [
														
 
															+            "3.C",
														
 
															+            "3.D",
														
 
															+            "3.E",
														
 
															+            "3.F",
														
 
															+            "3.G",
														
 
															+            "5",
														
 
															+            "5.A",
														
 
															+            "5.B",
														
 
															+            "5.C",
														
 
															+            "5.D",
														
 
															+            "6",
														
 
															+        ]
														
 
															+        # "years": ["2005"],
														
 
															     },
														
 
															-    6: { #2005 data copy of 2019
														
 
															+    6: {  # 2005 data copy of 2019
														
 
															         "years": ["2005"],
														
 
															     },
														
 
															-    7: { # 2005 data copy of 2019 for 3.G
														
 
															+    7: {  # 2005 data copy of 2019 for 3.G
														
 
															         "years": ["2005"],
														
 
															     },
														
 
															-    25: { # 2005 data copy of 2019 (CO2, 2005-2019, first table)
														
 
															+    25: {  # 2005 data copy of 2019 (CO2, 2005-2019, first table)
														
 
															         "years": ["2005"],
														
 
															     },
														
 
															-    26: { # 2005 data copy of 2019 (CO2, 2005-2019, second table)
														
 
															+    26: {  # 2005 data copy of 2019 (CO2, 2005-2019, second table)
														
 
															         "years": ["2005"],
														
 
															     },
														
 
															 }
														
 
															 cat_mapping = {
														
 
															-    '3': 'M.AG',
														
 
															-    '3.A': '3.A.1',
														
 
															-    '3.B': '3.A.2',
														
 
															-    '3.C': '3.C.7', # rice
														
 
															-    '3.D': 'M.3.C.45AG', # Agricultural soils
														
 
															-    '3.E': '3.C.1.c', # prescribed burning of savanna
														
 
															-    '3.F': '3.C.1.b', # field burning of agricultural residues
														
 
															-    '3.G': '3.C.3', # urea application
														
 
															-    '4': 'M.LULUCF',
														
 
															-    '4.A': '3.B.1', # forest
														
 
															-    '4.B': '3.B.2', # cropland
														
 
															-    '4.C': '3.B.3', # grassland
														
 
															-    '4.D': '3.B.4', # wetland
														
 
															-    '4.E': '3.B.5', # Settlements
														
 
															-    '4.F': '3.B.6', # other land
														
 
															-    '4.G': '3.D.1', # HWP
														
 
															-    '5': '4',
														
 
															-    '5.A': '4.A',
														
 
															-    '5.B': '4.B',
														
 
															-    '5.C': '4.C',
														
 
															-    '5.D': '4.D',
														
 
															-    '6': '5',
														
 
															+    "3": "M.AG",
														
 
															+    "3.A": "3.A.1",
														
 
															+    "3.B": "3.A.2",
														
 
															+    "3.C": "3.C.7",  # rice
														
 
															+    "3.D": "M.3.C.45AG",  # Agricultural soils
														
 
															+    "3.E": "3.C.1.c",  # prescribed burning of savanna
														
 
															+    "3.F": "3.C.1.b",  # field burning of agricultural residues
														
 
															+    "3.G": "3.C.3",  # urea application
														
 
															+    "4": "M.LULUCF",
														
 
															+    "4.A": "3.B.1",  # forest
														
 
															+    "4.B": "3.B.2",  # cropland
														
 
															+    "4.C": "3.B.3",  # grassland
														
 
															+    "4.D": "3.B.4",  # wetland
														
 
															+    "4.E": "3.B.5",  # Settlements
														
 
															+    "4.F": "3.B.6",  # other land
														
 
															+    "4.G": "3.D.1",  # HWP
														
 
															+    "5": "4",
														
 
															+    "5.A": "4.A",
														
 
															+    "5.B": "4.B",
														
 
															+    "5.C": "4.C",
														
 
															+    "5.D": "4.D",
														
 
															+    "6": "5",
														
 
															 }
														
 
															 aggregate_cats = {
														
 
															-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
														
 
															-    'M.3.C.1.AG': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass '
														
 
															-                                                          'Burning (Agriculture)'},
														
 
															-    '3.C.1': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
														
 
															-    '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-    'M.3.C.AG': {'sources': ['3.C.1.AG', '3.C.3', 'M.3.C.45AG', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-    '3.D': {'sources': ['3.D.1'], 'name': 'Other'},
														
 
															-    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
														
 
															-    '0': {'sources': ['1', '2', '3', '4', '5']},
														
 
															+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+    "3.B": {
														
 
															+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
														
 
															+        "name": "Land",
														
 
															+    },
														
 
															+    "M.3.C.1.AG": {
														
 
															+        "sources": ["3.C.1.c", "3.C.1.b"],
														
 
															+        "name": "Emissions from Biomass " "Burning (Agriculture)",
														
 
															+    },
														
 
															+    "3.C.1": {
														
 
															+        "sources": ["3.C.1.c", "3.C.1.b"],
														
 
															+        "name": "Emissions from Biomass Burning",
														
 
															+    },
														
 
															+    "3.C": {
														
 
															+        "sources": ["3.C.1", "3.C.3", "M.3.C.45AG", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+    },
														
 
															+    "M.3.C.AG": {
														
 
															+        "sources": ["3.C.1.AG", "3.C.3", "M.3.C.45AG", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+    },
														
 
															+    "3.D": {"sources": ["3.D.1"], "name": "Other"},
														
 
															+    "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															+    "M.AG.ELV": {
														
 
															+        "sources": ["M.3.C.AG"],
														
 
															+        "name": "Agriculture excluding livestock emissions",
														
 
															+    },
														
 
															+    "0": {"sources": ["1", "2", "3", "4", "5"]},
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -1,41 +1,41 @@
 
															-# Montenegro BUR 3
														
 
															-# Code to read the emissions inventory contained in Montenegro's third BUR from pdf
														
 
															-# and convert into PRIMAP2 format
														
 
															+"""
														
 
															+Read Montenegro's BUR3 from pdf
														
 
															+
														
 
															+This script reads data from Montenegro's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 # ###
														
 
															 # imports
														
 
															 # ###
														
 
															 import copy
														
 
															 import re
														
 
															-from pathlib import Path
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
														
 
															+from config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
														
 
															 from primap2.pm2io._data_reading import matches_time_format
														
 
															+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    # folders and files
														
 
															-    root_path = Path(__file__).parents[3].absolute()
														
 
															-    root_path = root_path.resolve()
														
 
															-    downloaded_data_path = root_path / "downloaded_data"
														
 
															-    extracted_data_path = root_path / "extracted_data"
														
 
															-
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Montenegro' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Montenegro'
														
 
															-    output_filename = 'MNE_BUR3_2022_'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Montenegro" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Montenegro"
														
 
															+    output_filename = "MNE_BUR3_2022_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															-    inventory_file_pdf = 'NIR-2021_MNE_Finalversion.pdf'
														
 
															+    inventory_file_pdf = "NIR-2021_MNE_Finalversion.pdf"
														
 
															     # reading and processing
														
 
															     years_to_read = range(1990, 2018 + 1)
														
 
															-    pages_to_read = range(535,583)
														
 
															+    pages_to_read = range(535, 583)
														
 
															     pos_entity = [0, 0]
														
 
															     cat_code_col = 0
														
@@ -43,7 +43,7 @@ if __name__ == "__main__":
 
															     regex_unit = r"\((.*)\)"
														
 
															     regex_entity = r"^(.*)\s\("
														
 
															-    gwp_to_use = 'AR4GWP100'
														
 
															+    gwp_to_use = "AR4GWP100"
														
 
															     # conversion to PRIMAP2 format
														
@@ -61,28 +61,28 @@ if __name__ == "__main__":
 
															     }
														
 
															     coords_value_mapping = {
														
 
															-        'unit': 'PRIMAP1',
														
 
															-        'entity': {
														
 
															+        "unit": "PRIMAP1",
														
 
															+        "entity": {
														
 
															             f"GHG ({gwp_to_use})": f"KYOTOGHG ({gwp_to_use})",
														
 
															             f"HFC ({gwp_to_use})": f"HFCS ({gwp_to_use})",
														
 
															             f"PFC ({gwp_to_use})": f"PFCS ({gwp_to_use})",
														
 
															         },
														
 
															-        'category': {
														
 
															-            'Total national GHG emissions (with LULUCF)': '0',
														
 
															-            'Total national GHG emissions (without LULUCF)': 'M.0.EL',
														
 
															-            'International Bunkers': 'M.BK',
														
 
															-            '1.A.3.a.i': 'M.BK.A',
														
 
															-            '1.A.3.d.i': 'M.BK.M',
														
 
															-            'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
														
 
															-            '6 Other': '6',
														
 
															-            '2 H': '2.H',
														
 
															+        "category": {
														
 
															+            "Total national GHG emissions (with LULUCF)": "0",
														
 
															+            "Total national GHG emissions (without LULUCF)": "M.0.EL",
														
 
															+            "International Bunkers": "M.BK",
														
 
															+            "1.A.3.a.i": "M.BK.A",
														
 
															+            "1.A.3.d.i": "M.BK.M",
														
 
															+            "CO2 from Biomass Combustion for Energy Production": "M.BIO",
														
 
															+            "6 Other": "6",
														
 
															+            "2 H": "2.H",
														
 
															         },
														
 
															     }
														
 
															     coords_value_filling = {
														
 
															         "category": {
														
 
															             "orig_cat_name": {
														
 
															-                'International Bunkers': 'M.BK',
														
 
															+                "International Bunkers": "M.BK",
														
 
															             },
														
 
															         },
														
 
															     }
														
@@ -103,7 +103,8 @@ if __name__ == "__main__":
 
															         "references": "https://unfccc.int/documents/461972",
														
 
															         "rights": "",
														
 
															         "contact": "mail@johannes-guetschow.de",
														
 
															-        "title": "Montenegro. Biennial update report (BUR). BUR 3. National inventory report.",
														
 
															+        "title": "Montenegro. Biennial update report (BUR). "
														
 
															+        "BUR 3. National inventory report.",
														
 
															         "comment": "Read fom pdf file by Johannes Gütschow",
														
 
															         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
														
 
															     }
														
@@ -111,7 +112,11 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # Read all time series table from pdf
														
 
															     # ###
														
 
															-    tables = camelot.read_pdf(str(input_folder / inventory_file_pdf), pages=','.join([str(page) for page in pages_to_read]), flavor='lattice')
														
 
															+    tables = camelot.read_pdf(
														
 
															+        str(input_folder / inventory_file_pdf),
														
 
															+        pages=",".join([str(page) for page in pages_to_read]),
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     # ###
														
 
															     # process tables and combine them using the pm2 pr.merge function
														
@@ -142,11 +147,14 @@ if __name__ == "__main__":
 
															         # remove ',' in numbers
														
 
															         years = df_current_table.columns[2:]
														
 
															-        def repl(m):
														
 
															+
														
 
															+        def repl(m):  # noqa: D103
														
 
															             return m.group("part1") + m.group("part2")
														
 
															+
														
 
															         for year in years:
														
 
															             df_current_table.loc[:, year] = df_current_table.loc[:, year].str.replace(
														
 
															-                '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															+                "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+            )
														
 
															         # add entity and unit cols
														
 
															         df_current_table["entity"] = entity
														
@@ -156,13 +164,15 @@ if __name__ == "__main__":
 
															             to_drop = drop_data[i]
														
 
															             if "cats" in to_drop.keys():
														
 
															                 mask = df_current_table["category"].isin(to_drop["cats"])
														
 
															-                df_current_table = df_current_table.drop(df_current_table[mask].index,
														
 
															-                                                         axis=0)
														
 
															+                df_current_table = df_current_table.drop(
														
 
															+                    df_current_table[mask].index, axis=0
														
 
															+                )
														
 
															             if "years" in to_drop.keys():
														
 
															                 df_current_table = df_current_table.drop(columns=to_drop["years"])
														
 
															         df_current_table["category"] = df_current_table["category"].fillna(
														
 
															-            value=df_current_table["orig_cat_name"])
														
 
															+            value=df_current_table["orig_cat_name"]
														
 
															+        )
														
 
															         df_current_table = df_current_table.drop(columns="orig_cat_name")
														
@@ -191,7 +201,7 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # convert to mass units from CO2eq
														
 
															-    entities_to_convert = ['N2O', 'SF6', 'CH4']
														
 
															+    entities_to_convert = ["N2O", "SF6", "CH4"]
														
 
															     entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
														
 
															     # for entity in entities_to_convert:
														
@@ -215,21 +225,28 @@ if __name__ == "__main__":
 
															     # map categories
														
 
															     data_if_2006 = data_if_2006.replace(
														
 
															-        {f"category ({coords_terminologies['category']})": cat_mapping})
														
 
															+        {f"category ({coords_terminologies['category']})": cat_mapping}
														
 
															+    )
														
 
															     data_if_2006[f"category ({coords_terminologies['category']})"].unique()
														
 
															     # rename the category col
														
 
															-    data_if_2006.rename(columns={
														
 
															-        f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
														
 
															-                        inplace=True)
														
 
															-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
														
 
															-    data_if_2006.attrs['dimensions']['*'] = [
														
 
															-        'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
														
 
															-        else item for item in data_if_2006.attrs['dimensions']['*']]
														
 
															+    data_if_2006 = data_if_2006.rename(
														
 
															+        columns={
														
 
															+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
														
 
															+        }
														
 
															+    )
														
 
															+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
														
 
															+    data_if_2006.attrs["dimensions"]["*"] = [
														
 
															+        "category (IPCC2006_PRIMAP)"
														
 
															+        if item == f"category ({coords_terminologies['category']})"
														
 
															+        else item
														
 
															+        for item in data_if_2006.attrs["dimensions"]["*"]
														
 
															+    ]
														
 
															     # aggregate categories
														
 
															     for cat_to_agg in aggregate_cats:
														
 
															         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
														
 
															-            aggregate_cats[cat_to_agg]["sources"])
														
 
															+            aggregate_cats[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         # print(df_test)
														
@@ -237,10 +254,10 @@ if __name__ == "__main__":
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -248,8 +265,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
														
 
															             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
														
@@ -257,7 +281,7 @@ if __name__ == "__main__":
 
															             df_combine = df_combine.reset_index()
														
 
															-            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
														
 
															+            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join="outer")
														
 
															             data_if_2006 = data_if_2006.reset_index(drop=True)
														
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
@@ -268,7 +292,6 @@ if __name__ == "__main__":
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															-
														
 
															     # ###
														
 
															     # save data to IF and native format
														
 
															     # ###
														
@@ -276,13 +299,22 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     # data in original categories
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_all.data_vars}
														
 
															-    data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_all.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # data in 2006 categories
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Morocco's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'MAR'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=MAR
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py
@@ -1,57 +1,98 @@
 
															+"""Config for Morocco's BUR3
														
 
															+
														
 
															+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
														
 
															+config and metadata are define din the reading script
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 # define which raw tables to combine
														
 
															 table_defs = {
														
 
															     2010: {
														
 
															-        'Energy': [0, 1],
														
 
															-        'Agriculture': [10],
														
 
															-        'IPPU': [15, 16, 17],
														
 
															-        'LULUCF': [30],
														
 
															-        'Waste': [35],
														
 
															+        "Energy": [0, 1],
														
 
															+        "Agriculture": [10],
														
 
															+        "IPPU": [15, 16, 17],
														
 
															+        "LULUCF": [30],
														
 
															+        "Waste": [35],
														
 
															     },
														
 
															     2012: {
														
 
															-        'Energy': [2, 3],
														
 
															-        'Agriculture': [11],
														
 
															-        'IPPU': [18, 19, 20],
														
 
															-        'LULUCF': [31],
														
 
															-        'Waste': [36],
														
 
															+        "Energy": [2, 3],
														
 
															+        "Agriculture": [11],
														
 
															+        "IPPU": [18, 19, 20],
														
 
															+        "LULUCF": [31],
														
 
															+        "Waste": [36],
														
 
															     },
														
 
															     2014: {
														
 
															-        'Energy': [4, 5],
														
 
															-        'Agriculture': [10],
														
 
															-        'IPPU': [21, 22, 23],
														
 
															-        'LULUCF': [32],
														
 
															-        'Waste': [37],
														
 
															+        "Energy": [4, 5],
														
 
															+        "Agriculture": [10],
														
 
															+        "IPPU": [21, 22, 23],
														
 
															+        "LULUCF": [32],
														
 
															+        "Waste": [37],
														
 
															     },
														
 
															     2016: {
														
 
															-        'Energy': [6, 7],
														
 
															-        'Agriculture': [10],
														
 
															-        'IPPU': [24, 25, 26],
														
 
															-        'LULUCF': [33],
														
 
															-        'Waste': [38],
														
 
															+        "Energy": [6, 7],
														
 
															+        "Agriculture": [10],
														
 
															+        "IPPU": [24, 25, 26],
														
 
															+        "LULUCF": [33],
														
 
															+        "Waste": [38],
														
 
															     },
														
 
															     2018: {
														
 
															-        'Energy': [8, 9],
														
 
															-        'Agriculture': [14],
														
 
															-        'IPPU': [27, 28, 29],
														
 
															-        'LULUCF': [34],
														
 
															-        'Waste': [39],
														
 
															+        "Energy": [8, 9],
														
 
															+        "Agriculture": [14],
														
 
															+        "IPPU": [27, 28, 29],
														
 
															+        "LULUCF": [34],
														
 
															+        "Waste": [39],
														
 
															     },
														
 
															 }
														
 
															 header_defs = {
														
 
															-    'Energy': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
														
 
															-        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
														
 
															-    'Agriculture': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
														
 
															-        ['', 'Gg', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']], # units are wrong
														
 
															+    "Energy": [
														
 
															+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
														
 
															+        ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
														
 
															+    ],
														
 
															+    "Agriculture": [
														
 
															+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
														
 
															+        ["", "Gg", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
														
 
															+    ],  # units are wrong
														
 
															     # in BUR pdf
														
 
															-    'IPPU': [['Catégories', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'COVNM', 'SO2'],
														
 
															-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
														
 
															-    'LULUCF': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
														
 
															-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
														
 
															-    'Waste': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
														
 
															-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
														
 
															+    "IPPU": [
														
 
															+        [
														
 
															+            "Catégories",
														
 
															+            "CO2",
														
 
															+            "CH4",
														
 
															+            "N2O",
														
 
															+            "HFCs",
														
 
															+            "PFCs",
														
 
															+            "SF6",
														
 
															+            "NOx",
														
 
															+            "CO",
														
 
															+            "COVNM",
														
 
															+            "SO2",
														
 
															+        ],
														
 
															+        [
														
 
															+            "",
														
 
															+            "GgCO2eq",
														
 
															+            "GgCO2eq",
														
 
															+            "GgCO2eq",
														
 
															+            "GgCO2eq",
														
 
															+            "GgCO2eq",
														
 
															+            "GgCO2eq",
														
 
															+            "Gg",
														
 
															+            "Gg",
														
 
															+            "Gg",
														
 
															+            "Gg",
														
 
															+        ],
														
 
															+    ],
														
 
															+    "LULUCF": [
														
 
															+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
														
 
															+        ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
														
 
															+    ],
														
 
															+    "Waste": [
														
 
															+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
														
 
															+        ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
														
 
															+    ],
														
 
															 }
														
 
															-remove_cats = ['3.A.4', '3.B', '3.B.4', '1.B.2.a', '1.B.2.b', '1.B.2.c']
														
 
															+remove_cats = ["3.A.4", "3.B", "3.B.4", "1.B.2.a", "1.B.2.b", "1.B.2.c"]
														
 
															 cat_mapping = {
														
 
															     "1.B.2.a.4": "1.B.2.a.iii.4",
														
@@ -61,81 +102,119 @@ cat_mapping = {
 
															     "1.B.2.b.4": "1.B.2.b.iii.4",
														
 
															     "1.B.2.b.5": "1.B.2.b.iii.5",
														
 
															     "1.B.2.b.6": "1.B.2.b.iii.6",
														
 
															-    "1.B.2.c.1": "1.B.2.b.i", # simplification, split to oil and gas ("1.B.2.X.i")
														
 
															-    "1.B.2.c.2": "1.B.2.b.ii", # simplification, split to oil and gas ("1.B.2.X.ii")
														
 
															-    '1.A.2.g': '1.A.2.m', # other industry
														
 
															-    '3.A': '3.A.1', # enteric fermentation
														
 
															-    '3.A.1': '3.A.1.a', # cattle
														
 
															-    '3.A.1.a': '3.A.1.a.i',
														
 
															-    '3.A.1.b': '3.A.1.a.ii',
														
 
															-    '3.A.2': '3.A.1.c',
														
 
															-    '3.A.3': '3.A.1.h', # Swine
														
 
															-    '3.A.4.a': '3.A.1.d', # goats
														
 
															-    '3.A.4.b': '3.A.1.e', # camels
														
 
															-    '3.A.4.c': '3.A.1.f', # horses
														
 
															-    '3.A.4.d': '3.A.1.g', # Mules and asses
														
 
															-    '3.A.4.e': '3.A.1.i', # poultry
														
 
															-#    '3.B': '3.A.2', # Manure Management
														
 
															-    '3.B.1': '3.A.2.a', # cattle
														
 
															-    '3.B.1.a': '3.A.2.a.i',
														
 
															-    '3.B.1.b': '3.A.2.a.ii',
														
 
															-    '3.B.2': '3.A.2.c', # Sheep
														
 
															-    '3.B.3': '3.A.2.h', # Swine
														
 
															-    '3.B.4.a': '3.A.2.d', # Goats
														
 
															-    '3.B.4.b': '3.A.2.e', # Camels
														
 
															-    '3.B.4.c': '3.A.2.f', # Horses
														
 
															-    '3.B.4.d': '3.A.2.g', # Mules and Asses
														
 
															-    '3.B.4.e': '3.A.2.i', # Poultry
														
 
															-    '3.B.5': '3.C.6', # indirect N2O from manure management
														
 
															-    '3.C': '3.C.7', # rice
														
 
															-    '3.D': 'M.3.C.45AG', # Agricultural soils
														
 
															-    '3.D.a': '3.C.4', #direct N2O from agri soils
														
 
															-    '3.D.a.1': '3.C.4.a', # inorganic fertilizers
														
 
															-    '3.D.a.2': '3.C.4.b', # organic fertilizers
														
 
															-    '3.D.a.3': '3.C.4.c', # urine and dung by grazing animals
														
 
															-    '3.D.a.4': '3.C.4.d', # N in crop residues
														
 
															-    '3.D.b': '3.C.5', # indirect N2O from managed soils
														
 
															-    '3.D.b.1': '3.C.5.a', # Atmospheric deposition
														
 
															-    '3.D.b.2': '3.C.5.b', # nitrogen leeching and runoff
														
 
															-    '3.H': '3.C.3', # urea application
														
 
															-    'LU.3.B.1': '3.B.1', # forest
														
 
															-    'LU.3.B.2': '3.B.2', # cropland
														
 
															-    'LU.3.B.3': '3.B.3', # grassland
														
 
															-    'LU.3.B.4': '3.B.4', # wetland
														
 
															-    'LU.3.B.5': '3.B.5', # Settlements
														
 
															-    'LU.3.B.6': '3.B.6', # other land
														
 
															+    "1.B.2.c.1": "1.B.2.b.i",  # simplification, split to oil and gas ("1.B.2.X.i")
														
 
															+    "1.B.2.c.2": "1.B.2.b.ii",  # simplification, split to oil and gas ("1.B.2.X.ii")
														
 
															+    "1.A.2.g": "1.A.2.m",  # other industry
														
 
															+    "3.A": "3.A.1",  # enteric fermentation
														
 
															+    "3.A.1": "3.A.1.a",  # cattle
														
 
															+    "3.A.1.a": "3.A.1.a.i",
														
 
															+    "3.A.1.b": "3.A.1.a.ii",
														
 
															+    "3.A.2": "3.A.1.c",
														
 
															+    "3.A.3": "3.A.1.h",  # Swine
														
 
															+    "3.A.4.a": "3.A.1.d",  # goats
														
 
															+    "3.A.4.b": "3.A.1.e",  # camels
														
 
															+    "3.A.4.c": "3.A.1.f",  # horses
														
 
															+    "3.A.4.d": "3.A.1.g",  # Mules and asses
														
 
															+    "3.A.4.e": "3.A.1.i",  # poultry
														
 
															+    #    '3.B': '3.A.2', # Manure Management
														
 
															+    "3.B.1": "3.A.2.a",  # cattle
														
 
															+    "3.B.1.a": "3.A.2.a.i",
														
 
															+    "3.B.1.b": "3.A.2.a.ii",
														
 
															+    "3.B.2": "3.A.2.c",  # Sheep
														
 
															+    "3.B.3": "3.A.2.h",  # Swine
														
 
															+    "3.B.4.a": "3.A.2.d",  # Goats
														
 
															+    "3.B.4.b": "3.A.2.e",  # Camels
														
 
															+    "3.B.4.c": "3.A.2.f",  # Horses
														
 
															+    "3.B.4.d": "3.A.2.g",  # Mules and Asses
														
 
															+    "3.B.4.e": "3.A.2.i",  # Poultry
														
 
															+    "3.B.5": "3.C.6",  # indirect N2O from manure management
														
 
															+    "3.C": "3.C.7",  # rice
														
 
															+    "3.D": "M.3.C.45AG",  # Agricultural soils
														
 
															+    "3.D.a": "3.C.4",  # direct N2O from agri soils
														
 
															+    "3.D.a.1": "3.C.4.a",  # inorganic fertilizers
														
 
															+    "3.D.a.2": "3.C.4.b",  # organic fertilizers
														
 
															+    "3.D.a.3": "3.C.4.c",  # urine and dung by grazing animals
														
 
															+    "3.D.a.4": "3.C.4.d",  # N in crop residues
														
 
															+    "3.D.b": "3.C.5",  # indirect N2O from managed soils
														
 
															+    "3.D.b.1": "3.C.5.a",  # Atmospheric deposition
														
 
															+    "3.D.b.2": "3.C.5.b",  # nitrogen leeching and runoff
														
 
															+    "3.H": "3.C.3",  # urea application
														
 
															+    "LU.3.B.1": "3.B.1",  # forest
														
 
															+    "LU.3.B.2": "3.B.2",  # cropland
														
 
															+    "LU.3.B.3": "3.B.3",  # grassland
														
 
															+    "LU.3.B.4": "3.B.4",  # wetland
														
 
															+    "LU.3.B.5": "3.B.5",  # Settlements
														
 
															+    "LU.3.B.6": "3.B.6",  # other land
														
 
															 }
														
 
															 aggregate_cats = {
														
 
															-    '1.B.2.a.iii': {'sources': ['1.B.2.a.iii.4', '1.B.2.a.iii.5', '1.B.2.a.iii.6'],
														
 
															-                    'name': 'All Other'},
														
 
															-    '1.B.2.b.iii': {'sources': ['1.B.2.b.iii.2', '1.B.2.b.iii.4', '1.B.2.b.iii.5',
														
 
															-                                '1.B.2.b.iii.6',],
														
 
															-                    'name': 'All Other'},
														
 
															-    '1.B.2.a': {'sources': ['1.B.2.a.iii'], 'name': 'Oil'},
														
 
															-    '1.B.2.b': {'sources': ['1.B.2.b.i', '1.B.2.b.ii', '1.B.2.b.iii'],
														
 
															-                'name': 'Natural Gas'},
														
 
															-    '2.D':  {'sources': ['2.D.4'], 'name': 'Non-Energy Products from Fuels and Solvent Use'},
														
 
															-    '2.F.1':  {'sources': ['2.F.1.a', '2.F.1.b'], 'name': 'Refrigeration and Air Conditioning'},
														
 
															-    '2.F':  {'sources': ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
														
 
															-             'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
														
 
															-    '2.H':  {'sources': ["2.H.1", "2.H.2", "2.H.3"], 'name': 'Other'},
														
 
															-    '3.A.2': {'sources': ['3.A.2.a', '3.A.2.c', '3.A.2.d', '3.A.2.e', '3.A.2.f',
														
 
															-                          '3.A.2.g', '3.A.2.h', '3.A.2.i'],
														
 
															-              'name': 'Manure Management'},
														
 
															-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
														
 
															-    '3.C': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-    'M.3.C.AG': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-    'M.AG': {'sources': ['3.A', 'M.3.C.AG'], 'name': 'Agriculture'},
														
 
															-    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
														
 
															-    '4': {'sources': ['4.A', '4.D'], 'name': 'Waste'},
														
 
															-    '0': {'sources': ['1', '2', '3', '4']},
														
 
															-    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4']},
														
 
															+    "1.B.2.a.iii": {
														
 
															+        "sources": ["1.B.2.a.iii.4", "1.B.2.a.iii.5", "1.B.2.a.iii.6"],
														
 
															+        "name": "All Other",
														
 
															+    },
														
 
															+    "1.B.2.b.iii": {
														
 
															+        "sources": [
														
 
															+            "1.B.2.b.iii.2",
														
 
															+            "1.B.2.b.iii.4",
														
 
															+            "1.B.2.b.iii.5",
														
 
															+            "1.B.2.b.iii.6",
														
 
															+        ],
														
 
															+        "name": "All Other",
														
 
															+    },
														
 
															+    "1.B.2.a": {"sources": ["1.B.2.a.iii"], "name": "Oil"},
														
 
															+    "1.B.2.b": {
														
 
															+        "sources": ["1.B.2.b.i", "1.B.2.b.ii", "1.B.2.b.iii"],
														
 
															+        "name": "Natural Gas",
														
 
															+    },
														
 
															+    "2.D": {
														
 
															+        "sources": ["2.D.4"],
														
 
															+        "name": "Non-Energy Products from Fuels and Solvent Use",
														
 
															+    },
														
 
															+    "2.F.1": {
														
 
															+        "sources": ["2.F.1.a", "2.F.1.b"],
														
 
															+        "name": "Refrigeration and Air Conditioning",
														
 
															+    },
														
 
															+    "2.F": {
														
 
															+        "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
														
 
															+        "name": "Product uses as Substitutes for Ozone Depleting Substances",
														
 
															+    },
														
 
															+    "2.H": {"sources": ["2.H.1", "2.H.2", "2.H.3"], "name": "Other"},
														
 
															+    "3.A.2": {
														
 
															+        "sources": [
														
 
															+            "3.A.2.a",
														
 
															+            "3.A.2.c",
														
 
															+            "3.A.2.d",
														
 
															+            "3.A.2.e",
														
 
															+            "3.A.2.f",
														
 
															+            "3.A.2.g",
														
 
															+            "3.A.2.h",
														
 
															+            "3.A.2.i",
														
 
															+        ],
														
 
															+        "name": "Manure Management",
														
 
															+    },
														
 
															+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+    "3.B": {
														
 
															+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
														
 
															+        "name": "Land",
														
 
															+    },
														
 
															+    "3.C": {
														
 
															+        "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+    },
														
 
															+    "M.3.C.AG": {
														
 
															+        "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+    },
														
 
															+    "M.AG": {"sources": ["3.A", "M.3.C.AG"], "name": "Agriculture"},
														
 
															+    "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															+    "M.AG.ELV": {
														
 
															+        "sources": ["M.3.C.AG"],
														
 
															+        "name": "Agriculture excluding livestock emissions",
														
 
															+    },
														
 
															+    "4": {"sources": ["4.A", "4.D"], "name": "Waste"},
														
 
															+    "0": {"sources": ["1", "2", "3", "4"]},
														
 
															+    "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},
														
 
															 }
														
 
															-zero_cats = ['1.B.2.a.i', '1.B.2.a.ii'] # venting and flaring with 0 for oil as
														
 
															+zero_cats = ["1.B.2.a.i", "1.B.2.a.ii"]  # venting and flaring with 0 for oil as
														
 
															 # all mapped to natural gas
														
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -1,13 +1,23 @@
 
															-# this script reads data from Morocco's BUR3
														
 
															-# Data is read from pdf
														
 
															+"""
														
 
															+Read Morocco's BUR3 from pdf
														
 
															+This script reads data from Morocco's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															 import copy
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_mar_bur3 import (aggregate_cats, cat_mapping, header_defs, remove_cats,
														
 
															-                              table_defs, zero_cats)
														
 
															+from config_mar_bur3 import (
														
 
															+    aggregate_cats,
														
 
															+    cat_mapping,
														
 
															+    header_defs,
														
 
															+    remove_cats,
														
 
															+    table_defs,
														
 
															+    zero_cats,
														
 
															+)
														
 
															 from primap2.pm2io._data_reading import filter_data, matches_time_format
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -16,11 +26,11 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Morocco' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Morocco'
														
 
															-    output_filename = 'MAR_BUR3_2022_'
														
 
															-    inventory_file = 'Morocco_BUR3_Fr.pdf'
														
 
															-    gwp_to_use = 'AR4GWP100'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Morocco" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Morocco"
														
 
															+    output_filename = "MAR_BUR3_2022_"
														
 
															+    inventory_file = "Morocco_BUR3_Fr.pdf"
														
 
															+    gwp_to_use = "AR4GWP100"
														
 
															     # years to read
														
 
															     years = [2010, 2012, 2014, 2016, 2018]
														
@@ -31,30 +41,28 @@ if __name__ == "__main__":
 
															     # special header as category code and name in one column
														
 
															     header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															-    index_cols = ['Catégories']
														
 
															+    index_cols = ["Catégories"]
														
 
															     # rows to remove
														
 
															-    cats_remove = [
														
 
															-        'Agriculture' # always empty
														
 
															-    ]
														
 
															+    cats_remove = ["Agriculture"]  # always empty
														
 
															     # manual category codes
														
 
															     cat_codes_manual = {
														
 
															-        '1.A.2.e -Industries agro-alimentaires et du tabac': '1.A.2.e',
														
 
															-        '1.A.2.f -Industries des minéraux non- métalliques': '1.A.2.f',
														
 
															+        "1.A.2.e -Industries agro-alimentaires et du tabac": "1.A.2.e",
														
 
															+        "1.A.2.f -Industries des minéraux non- métalliques": "1.A.2.f",
														
 
															         #'Agriculture': 'M.AG',
														
 
															-        '2. PIUP': '2',
														
 
															-        'UTCATF': 'M.LULUCF',
														
 
															-        '3.B.1 Terres forestières': 'LU.3.B.1',
														
 
															-        '3.B.2 Terres cultivées': 'LU.3.B.2',
														
 
															-        '3.B.3 Prairies': 'LU.3.B.3',
														
 
															-        '3.B.4 Terres humides': 'LU.3.B.4',
														
 
															-        '3.B.5 Etablissements': 'LU.3.B.5',
														
 
															-        '3.B.6 Autres terres': 'LU.3.B.6',
														
 
															-        '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
														
 
															+        "2. PIUP": "2",
														
 
															+        "UTCATF": "M.LULUCF",
														
 
															+        "3.B.1 Terres forestières": "LU.3.B.1",
														
 
															+        "3.B.2 Terres cultivées": "LU.3.B.2",
														
 
															+        "3.B.3 Prairies": "LU.3.B.3",
														
 
															+        "3.B.4 Terres humides": "LU.3.B.4",
														
 
															+        "3.B.5 Etablissements": "LU.3.B.5",
														
 
															+        "3.B.6 Autres terres": "LU.3.B.6",
														
 
															+        "1.B.1.a.i.1 -Exploitation minière": "1.A.1.a.i.1",
														
 
															     }
														
 
															-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
														
 
															+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*"
														
 
															     coords_terminologies = {
														
 
															         "area": "ISO3",
														
@@ -66,32 +74,29 @@ if __name__ == "__main__":
 
															         "source": "MAR-GHG-inventory ",
														
 
															         "provenance": "measured",
														
 
															         "area": "MAR",
														
 
															-        "scenario": "BUR3"
														
 
															+        "scenario": "BUR3",
														
 
															     }
														
 
															     coords_value_mapping = {
														
 
															         "unit": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'HFCs (AR4GWP100)': 'HFCS (AR4GWP100)',
														
 
															-            'PFCs (AR4GWP100)': 'PFCS (AR4GWP100)',
														
 
															-            'COVNM': 'NMVOC',
														
 
															-        }
														
 
															+            "HFCs (AR4GWP100)": "HFCS (AR4GWP100)",
														
 
															+            "PFCs (AR4GWP100)": "PFCS (AR4GWP100)",
														
 
															+            "COVNM": "NMVOC",
														
 
															+        },
														
 
															     }
														
 
															+    coords_cols = {"category": "category", "entity": "entity", "unit": "unit"}
														
 
															-    coords_cols = {
														
 
															-        "category": "category",
														
 
															-        "entity": "entity",
														
 
															-        "unit": "unit"
														
 
															-    }
														
 
															-
														
 
															-    #add_coords_cols = {
														
 
															+    # add_coords_cols = {
														
 
															     #    "orig_cat_name": ["orig_cat_name", "category"],
														
 
															-    #}
														
 
															+    # }
														
 
															     filter_remove = {
														
 
															         "f1": {
														
 
															-            "entity": ['Other halogenated gases without CO2 equivalent conversion factors (2)'],
														
 
															+            "entity": [
														
 
															+                "Other halogenated gases without CO2 equivalent conversion factors (2)"
														
 
															+            ],
														
 
															         },
														
 
															     }
														
@@ -107,8 +112,9 @@ if __name__ == "__main__":
 
															     ##### read the raw data from pdf #####
														
 
															     tables = camelot.read_pdf(
														
 
															         str(input_folder / inventory_file),
														
 
															-        pages=','.join([str(page) for page in pages_to_read]),
														
 
															-        flavor='lattice')
														
 
															+        pages=",".join([str(page) for page in pages_to_read]),
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     ##### combine tables and convert to long format #####
														
 
															     df_all = None
														
@@ -120,8 +126,9 @@ if __name__ == "__main__":
 
															             df_first = tables[sector_tables[0]].df
														
 
															             if len(sector_tables) > 1:
														
 
															                 for table in sector_tables[1:]:
														
 
															-                    df_this_table = pd.concat([df_first, tables[table].df], axis=0,
														
 
															-                                              join='outer')
														
 
															+                    df_this_table = pd.concat(
														
 
															+                        [df_first, tables[table].df], axis=0, join="outer"
														
 
															+                    )
														
 
															             else:
														
 
															                 df_this_table = df_first
														
@@ -130,11 +137,11 @@ if __name__ == "__main__":
 
															             df_this_table.columns = header_defs[sector]
														
 
															             # fix 2018 agri table
														
 
															-            if (year == 2018) & (sector == "Agriculture"):
														
 
															+            if (year == 2018) & (sector == "Agriculture"):  # noqa: PLR2004
														
 
															                 last_shift_row = 25
														
 
															-                df_temp = df_this_table.iloc[0: last_shift_row, 1:].copy()
														
 
															-                df_this_table.iloc[0, 1:] = ''
														
 
															-                df_this_table.iloc[1: last_shift_row + 1, 1:] = df_temp
														
 
															+                df_temp = df_this_table.iloc[0:last_shift_row, 1:].copy()
														
 
															+                df_this_table.iloc[0, 1:] = ""
														
 
															+                df_this_table.iloc[1 : last_shift_row + 1, 1:] = df_temp
														
 
															             # replace line breaks, long hyphens, double, and triple spaces in category names
														
 
															             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
														
@@ -144,14 +151,15 @@ if __name__ == "__main__":
 
															             # set index and convert to long format
														
 
															             df_this_table = df_this_table.set_index(index_cols)
														
 
															-            df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
														
 
															-                                                                  header_long)
														
 
															+            df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+                df_this_table, year, header_long
														
 
															+            )
														
 
															             # print(df_this_table_long.head())
														
 
															             if df_all is None:
														
 
															                 df_all = df_this_table_long
														
 
															             else:
														
 
															-                df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
														
 
															+                df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
														
 
															     df_all = df_all.reset_index(drop=True)
														
@@ -166,24 +174,32 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															     df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-        return m.group('code')
														
 
															-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_all["category"] = df_all["category"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     # prepare numbers for pd.to_numeric
														
 
															-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ', '')
														
 
															-    def repl(m):
														
 
															-        return m.group('part1') + '.' + m.group('part2')
														
 
															-    df_all.loc[:, 'data'] = df_all.loc[:, 'data'].str.replace(
														
 
															-        '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-    df_all['data'][df_all['data'].isnull()] = 'NaN'
														
 
															+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(" ", "")
														
 
															+
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("part1") + "." + m.group("part2")
														
 
															+
														
 
															+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(
														
 
															+        "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+    )
														
 
															+    df_all["data"][df_all["data"].isna()] = "NaN"
														
 
															     # add GWP information to entity
														
 
															     for entity in df_all["entity"].unique():
														
 
															-        df_all["entity"][(df_all["entity"] == entity) & (
														
 
															-                    df_all["unit"] == "GgCO2eq")] = f"{entity} ({gwp_to_use})"
														
 
															+        df_all["entity"][
														
 
															+            (df_all["entity"] == entity) & (df_all["unit"] == "GgCO2eq")
														
 
															+        ] = f"{entity} ({gwp_to_use})"
														
 
															     # drop "original_cat_name" as it has non-unique values per category
														
 
															     df_all = df_all.drop(columns="orig_cat_name")
														
@@ -196,7 +212,8 @@ if __name__ == "__main__":
 
															         coords_value_mapping=coords_value_mapping,
														
 
															         filter_remove=filter_remove,
														
 
															         meta_data=meta_data,
														
 
															-        convert_str=True
														
 
															+        convert_str=True,
														
 
															+        time_format="%Y",
														
 
															     )
														
 
															     # make sure all col headers are str
														
@@ -205,7 +222,9 @@ if __name__ == "__main__":
 
															     # conversion to PRIMAP2 native format
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															-    entities_to_convert = ['CO2'] #['N2O', 'SF6', 'CO2', 'CH4'] # CO2 is not converted on
														
 
															+    entities_to_convert = [
														
 
															+        "CO2"
														
 
															+    ]  # ['N2O', 'SF6', 'CO2', 'CH4'] # CO2 is not converted on
														
 
															     # conversion to IF as data with and without GWP exists. needs to be fixed in primap2
														
 
															     entities_to_convert = [f"{entity} (AR4GWP100)" for entity in entities_to_convert]
														
@@ -230,38 +249,42 @@ if __name__ == "__main__":
 
															     data_if_2006.attrs = copy.deepcopy(data_if.attrs)
														
 
															     filter_remove_cats = {
														
 
															-        "cat": {
														
 
															-            f"category ({coords_terminologies['category']})":
														
 
															-        remove_cats
														
 
															-        },
														
 
															+        "cat": {f"category ({coords_terminologies['category']})": remove_cats},
														
 
															     }
														
 
															     filter_data(data_if_2006, filter_remove=filter_remove_cats)
														
 
															     # map categories
														
 
															     data_if_2006 = data_if_2006.replace(
														
 
															-        {f"category ({coords_terminologies['category']})": cat_mapping})
														
 
															+        {f"category ({coords_terminologies['category']})": cat_mapping}
														
 
															+    )
														
 
															     data_if_2006[f"category ({coords_terminologies['category']})"].unique()
														
 
															     # rename the category col
														
 
															-    data_if_2006.rename(columns={
														
 
															-        f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
														
 
															-                        inplace=True)
														
 
															-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
														
 
															-    data_if_2006.attrs['dimensions']['*'] = [
														
 
															-        'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
														
 
															-        else item for item in data_if_2006.attrs['dimensions']['*']]
														
 
															+    data_if_2006 = data_if_2006.rename(
														
 
															+        columns={
														
 
															+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
														
 
															+        }
														
 
															+    )
														
 
															+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
														
 
															+    data_if_2006.attrs["dimensions"]["*"] = [
														
 
															+        "category (IPCC2006_PRIMAP)"
														
 
															+        if item == f"category ({coords_terminologies['category']})"
														
 
															+        else item
														
 
															+        for item in data_if_2006.attrs["dimensions"]["*"]
														
 
															+    ]
														
 
															     # aggregate categories
														
 
															-    time_format = '%Y'
														
 
															+    time_format = "%Y"
														
 
															     time_columns = [
														
 
															         col
														
 
															-        for col in data_if_2006.columns.values
														
 
															+        for col in data_if_2006.columns.to_numpy()
														
 
															         if matches_time_format(col, time_format)
														
 
															     ]
														
 
															     for cat_to_agg in aggregate_cats:
														
 
															         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
														
 
															-            aggregate_cats[cat_to_agg]["sources"])
														
 
															+            aggregate_cats[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         # print(df_test)
														
@@ -273,8 +296,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
														
 
															             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
														
@@ -282,15 +312,16 @@ if __name__ == "__main__":
 
															             df_combine = df_combine.reset_index()
														
 
															-            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
														
 
															+            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join="outer")
														
 
															             data_if_2006 = data_if_2006.reset_index(drop=True)
														
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															     for cat in zero_cats:
														
 
															         entities = data_if_2006["entity"].unique()
														
 
															-        data_zero = data_if_2006[data_if_2006["category (IPCC2006_PRIMAP)"]=="1"].copy(
														
 
															-            deep=True)
														
 
															+        data_zero = data_if_2006[
														
 
															+            data_if_2006["category (IPCC2006_PRIMAP)"] == "1"
														
 
															+        ].copy(deep=True)
														
 
															         data_zero["category (IPCC2006_PRIMAP)"] = cat
														
 
															         for col in time_columns:
														
 
															             data_zero[col] = 0
														
@@ -303,7 +334,6 @@ if __name__ == "__main__":
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															-
														
 
															     # ###
														
 
															     # save data to IF and native format
														
 
															     # ###
														
@@ -312,17 +342,21 @@ if __name__ == "__main__":
 
															     # data in original categories
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # data in 2006 categories
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
														
 
															+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															     data_pm2_2006.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
														
 
															+        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Nigeria's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'NGA'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=NGA
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py
@@ -1,274 +1,280 @@
 
															-gwp_to_use = 'AR5GWP100'
														
 
															+"""Config for Nigeria's BUR3
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															+gwp_to_use = "AR5GWP100"
														
 
															 tables_trends = {
														
 
															-    '70': { # GHG by main sector
														
 
															-        'page': '70',
														
 
															-        'area': ['177,430,450,142'],
														
 
															-        'cols': ['208,260,311,355,406'],
														
 
															-        'coords_defaults': {
														
 
															-            'unit': 'GgCO2eq',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "70": {  # GHG by main sector
														
 
															+        "page": "70",
														
 
															+        "area": ["177,430,450,142"],
														
 
															+        "cols": ["208,260,311,355,406"],
														
 
															+        "coords_defaults": {
														
 
															+            "unit": "GgCO2eq",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															             "entity": "entity",
														
 
															         },
														
 
															-        'copy_cols': {
														
 
															+        "copy_cols": {
														
 
															             # to: from
														
 
															-            'entity': 'Year',
														
 
															+            "entity": "Year",
														
 
															         },
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'AFOLU': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "AFOLU": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															-            'entity': {
														
 
															-                'Total emissions': f'KYOTOGHG emissions ({gwp_to_use})',
														
 
															-                'Energy': f'KYOTOGHG ({gwp_to_use})',
														
 
															-                'IPPU': f'KYOTOGHG ({gwp_to_use})',
														
 
															-                'AFOLU': f'KYOTOGHG emissions ({gwp_to_use})',
														
 
															-                'Waste': f'KYOTOGHG ({gwp_to_use})',
														
 
															+            "entity": {
														
 
															+                "Total emissions": f"KYOTOGHG emissions ({gwp_to_use})",
														
 
															+                "Energy": f"KYOTOGHG ({gwp_to_use})",
														
 
															+                "IPPU": f"KYOTOGHG ({gwp_to_use})",
														
 
															+                "AFOLU": f"KYOTOGHG emissions ({gwp_to_use})",
														
 
															+                "Waste": f"KYOTOGHG ({gwp_to_use})",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows': [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '71': { # main gases by sector
														
 
															-    'page': '71',
														
 
															-        'area': ['82,760,509,454'],
														
 
															-        'cols': ['124,186,249,326,388,454'],
														
 
															-        'coords_defaults': {
														
 
															-            'category': '0',
														
 
															-            'unit': 'GgCO2eq',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "71": {  # main gases by sector
														
 
															+        "page": "71",
														
 
															+        "area": ["82,760,509,454"],
														
 
															+        "cols": ["124,186,249,326,388,454"],
														
 
															+        "coords_defaults": {
														
 
															+            "category": "0",
														
 
															+            "unit": "GgCO2eq",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "entity": "Year",
														
 
															         },
														
 
															-        'remove_cols': [],
														
 
															-        'coords_value_mapping': {
														
 
															+        "remove_cols": [],
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'entity': {
														
 
															-                'Total GHG emissions (CO₂-eq)': f'KYOTOGHG emissions ({gwp_to_use})',
														
 
															-                'Removals (CO₂) (CO₂-eq)': 'CO2 removals',
														
 
															-                'Net emissions (CO₂-eq)': f'KYOTOGHG ({gwp_to_use})',
														
 
															-                'CO₂ (Gg)': 'CO2 emissions',
														
 
															-                'CH₄ (CO₂-eq)': f'CH4 ({gwp_to_use})',
														
 
															-                'N₂O (CO₂-eq)': f'N2O ({gwp_to_use})',
														
 
															+            "entity": {
														
 
															+                "Total GHG emissions (CO₂-eq)": f"KYOTOGHG emissions ({gwp_to_use})",
														
 
															+                "Removals (CO₂) (CO₂-eq)": "CO2 removals",
														
 
															+                "Net emissions (CO₂-eq)": f"KYOTOGHG ({gwp_to_use})",
														
 
															+                "CO₂ (Gg)": "CO2 emissions",
														
 
															+                "CH₄ (CO₂-eq)": f"CH4 ({gwp_to_use})",
														
 
															+                "N₂O (CO₂-eq)": f"N2O ({gwp_to_use})",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2, 3, 4],
														
 
															+        "label_rows": [0, 1, 2, 3, 4],
														
 
															     },
														
 
															-    '72_1': { # CO2 by main sector
														
 
															-    'page': '72',
														
 
															-        'area': ['122,760,496,472'],
														
 
															-        'cols': ['159,212,265,311,355,406,456'],
														
 
															-        'coords_defaults': {
														
 
															+    "72_1": {  # CO2 by main sector
														
 
															+        "page": "72",
														
 
															+        "area": ["122,760,496,472"],
														
 
															+        "cols": ["159,212,265,311,355,406,456"],
														
 
															+        "coords_defaults": {
														
 
															             #'entity': 'CO2',
														
 
															-            'unit': 'Gg',
														
 
															+            "unit": "Gg",
														
 
															         },
														
 
															-        'coords_cols': {
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															-            'entity': 'entity',
														
 
															+            "entity": "entity",
														
 
															         },
														
 
															-        'remove_cols': ['Total emissions'],
														
 
															-        'copy_cols': {
														
 
															+        "remove_cols": ["Total emissions"],
														
 
															+        "copy_cols": {
														
 
															             # to: from
														
 
															-            'entity': 'Year',
														
 
															+            "entity": "Year",
														
 
															         },
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total net emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'AFOLU - emissions': '3',
														
 
															-                'AFOLU - removals': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total net emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "AFOLU - emissions": "3",
														
 
															+                "AFOLU - removals": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															-            'entity': {
														
 
															-                'Total net emissions': 'CO2',
														
 
															-                'Energy': 'CO2',
														
 
															-                'IPPU': 'CO2',
														
 
															-                'AFOLU - emissions': 'CO2 emissions',
														
 
															-                'AFOLU - removals': 'CO2 removals',
														
 
															-                'Waste': 'CO2',
														
 
															+            "entity": {
														
 
															+                "Total net emissions": "CO2",
														
 
															+                "Energy": "CO2",
														
 
															+                "IPPU": "CO2",
														
 
															+                "AFOLU - emissions": "CO2 emissions",
														
 
															+                "AFOLU - removals": "CO2 removals",
														
 
															+                "Waste": "CO2",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '72_2': { # CH4 by sector
														
 
															-    'page': '72',
														
 
															-        'area': ['133,333,483,41'],
														
 
															-        'cols': ['172,230,280,333,384,439'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'CH4',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "72_2": {  # CH4 by sector
														
 
															+        "page": "72",
														
 
															+        "area": ["133,333,483,41"],
														
 
															+        "cols": ["172,230,280,333,384,439"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "CH4",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															-        'remove_cols': ['Total (Gg CO₂-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "remove_cols": ["Total (Gg CO₂-eq)"],
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'AFOLU - emissions': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "AFOLU - emissions": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '73': { # N2O by sector
														
 
															-    'page': '73',
														
 
															-        'area': ['155,666,643,364'],
														
 
															-        'cols': ['194,265,309,366,419'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'N2O',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "73": {  # N2O by sector
														
 
															+        "page": "73",
														
 
															+        "area": ["155,666,643,364"],
														
 
															+        "cols": ["194,265,309,366,419"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "N2O",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															-        'remove_cols': ['Total emissions (Gg CO₂-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "remove_cols": ["Total emissions (Gg CO₂-eq)"],
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total': '0',
														
 
															-                'Energy': '1',
														
 
															-                'AFOLU': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total": "0",
														
 
															+                "Energy": "1",
														
 
															+                "AFOLU": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '74': { # NOx by sector
														
 
															-    'page': '74',
														
 
															-        'area': ['148,457,467,166'],
														
 
															-        'cols': ['190,254,304,359,421'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'NOX',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "74": {  # NOx by sector
														
 
															+        "page": "74",
														
 
															+        "area": ["148,457,467,166"],
														
 
															+        "cols": ["190,254,304,359,421"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "NOX",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															         #'remove_cols': [],
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'AFOLU': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "AFOLU": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '75': { # CO by sector
														
 
															-    'page': '75',
														
 
															-        'area': ['161,763,456,472'],
														
 
															-        'cols': ['199,256,307,359,410'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'CO',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "75": {  # CO by sector
														
 
															+        "page": "75",
														
 
															+        "area": ["161,763,456,472"],
														
 
															+        "cols": ["199,256,307,359,410"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "CO",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'AFOLU': '3',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "AFOLU": "3",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '75_2': { # NMVOC by sector
														
 
															-    'page': '75',
														
 
															-        'area': ['177,325,441,50'],
														
 
															-        'cols': ['219,287,340,395'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'NMVOC',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "75_2": {  # NMVOC by sector
														
 
															+        "page": "75",
														
 
															+        "area": ["177,325,441,50"],
														
 
															+        "cols": ["219,287,340,395"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "NMVOC",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '76_1': { # NMVOC by sector
														
 
															-    'page': '76',
														
 
															-        'area': ['175,782,448,675'],
														
 
															-        'cols': ['216,282,340,390'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'NMVOC',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "76_1": {  # NMVOC by sector
														
 
															+        "page": "76",
														
 
															+        "area": ["175,782,448,675"],
														
 
															+        "cols": ["216,282,340,390"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "NMVOC",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'IPPU': '2',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "IPPU": "2",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0, 1, 2],
														
 
															+        "label_rows": [0, 1, 2],
														
 
															     },
														
 
															-    '76_2': { # SO2 by sector
														
 
															-    'page': '76',
														
 
															-        'area': ['197,562,421,226'],
														
 
															-        'cols': ['243,331,381'],
														
 
															-        'coords_defaults': {
														
 
															-            'entity': 'SO2',
														
 
															-            'unit': 'Gg',
														
 
															-        },
														
 
															-        'coords_cols': {
														
 
															+    "76_2": {  # SO2 by sector
														
 
															+        "page": "76",
														
 
															+        "area": ["197,562,421,226"],
														
 
															+        "cols": ["243,331,381"],
														
 
															+        "coords_defaults": {
														
 
															+            "entity": "SO2",
														
 
															+            "unit": "Gg",
														
 
															+        },
														
 
															+        "coords_cols": {
														
 
															             "category": "Year",
														
 
															         },
														
 
															         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
														
 
															-        'coords_value_mapping': {
														
 
															+        "coords_value_mapping": {
														
 
															             "unit": "PRIMAP1",
														
 
															-            'category': {
														
 
															-                'Total emissions': '0',
														
 
															-                'Energy': '1',
														
 
															-                'Waste': '4',
														
 
															+            "category": {
														
 
															+                "Total emissions": "0",
														
 
															+                "Energy": "1",
														
 
															+                "Waste": "4",
														
 
															             },
														
 
															         },
														
 
															-        'label_rows':  [0],
														
 
															+        "label_rows": [0],
														
 
															     },
														
 
															 }
														
 
															 pages_inventory = {
														
 
															-    '78': 1,
														
 
															-    '79': 0,
														
 
															-    '80': 0,
														
 
															-    '81': 0,
														
 
															-    '82': 0,
														
 
															+    "78": 1,
														
 
															+    "79": 0,
														
 
															+    "80": 0,
														
 
															+    "81": 0,
														
 
															+    "82": 0,
														
 
															 }
														
 
															 year_inventory = 2017
														
@@ -279,8 +285,8 @@ unit_row = 0
 
															 ###
														
 
															 index_cols = "Categories"
														
 
															 units_inv = {
														
 
															-    'Emissions (Gg)': 'Gg',
														
 
															-    'Emissions CO2 Equivalents (Gg)': 'GgCO2eq',
														
 
															+    "Emissions (Gg)": "Gg",
														
 
															+    "Emissions CO2 Equivalents (Gg)": "GgCO2eq",
														
 
															 }
														
 
															 # special header as category UNFCCC_GHG_data and name in one column
														
 
															 header_long = ["category", "entity", "unit", "time", "data"]
														
@@ -288,11 +294,11 @@ header_long = ["category", "entity", "unit", "time", "data"]
 
															 # manual category codes
														
 
															 cat_codes_manual = {
														
 
															-    'Total National Emissions and Removals': '0',
														
 
															-    'International Bunkers': 'M.BK',
														
 
															+    "Total National Emissions and Removals": "0",
														
 
															+    "International Bunkers": "M.BK",
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,9})\s.*'
														
 
															+cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,9})\s.*"
														
 
															 coords_cols = {
														
 
															     "category": "category",
														
@@ -321,29 +327,24 @@ coords_value_mapping = {
 
															     "unit": "PRIMAP1",
														
 
															     "category": "PRIMAP1",
														
 
															     "entity": {
														
 
															-        'Net CO2 (1)(2)': 'CO2',
														
 
															-        'CH4': "CH4",
														
 
															-        'N2O': "N2O",
														
 
															-        'HFCs': f"HFCS ({gwp_to_use})",
														
 
															-        'PFCs': f"PFCS ({gwp_to_use})",
														
 
															-        'SF6': f"SF6 ({gwp_to_use})",
														
 
															+        "Net CO2 (1)(2)": "CO2",
														
 
															+        "CH4": "CH4",
														
 
															+        "N2O": "N2O",
														
 
															+        "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+        "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+        "SF6": f"SF6 ({gwp_to_use})",
														
 
															         #'NOx': 'NOX',
														
 
															-        'CO': 'CO', # no mapping, just added for completeness here
														
 
															-        'NMVOCs': 'NMVOC',
														
 
															-        'SO2': 'SO2', # no mapping, just added for completeness here
														
 
															-        'Other halogenated gases with CO2 eq conversion factors (3)':
														
 
															-            f"UnspMixOfHFCs ({gwp_to_use})",
														
 
															+        "CO": "CO",  # no mapping, just added for completeness here
														
 
															+        "NMVOCs": "NMVOC",
														
 
															+        "SO2": "SO2",  # no mapping, just added for completeness here
														
 
															+        "Other halogenated gases with CO2 eq conversion factors (3)": f"UnspMixOfHFCs ({gwp_to_use})",
														
 
															     },
														
 
															 }
														
 
															 filter_remove = {
														
 
															-    'f1': {
														
 
															-        'entity': ['Other halogenated gases without CO2 eq conversion factors (4)']
														
 
															-    },
														
 
															-    'f2': {
														
 
															-        'category': 'Memo'
														
 
															-    },
														
 
															+    "f1": {"entity": ["Other halogenated gases without CO2 eq conversion factors (4)"]},
														
 
															+    "f2": {"category": "Memo"},
														
 
															 }
														
 
															 filter_keep = {}
														
@@ -353,73 +354,90 @@ meta_data = {
 
															     "rights": "",
														
 
															     "contact": "mail@johannes-guestchow.de",
														
 
															     "title": "Nigeria. Second Biennial Update Report (BUR2) to the United Nations "
														
 
															-             "Framework Convention on Climate Change",
														
 
															+    "Framework Convention on Climate Change",
														
 
															     "comment": "Read fom pdf by Johannes Gütschow",
														
 
															     "institution": "UNFCCC",
														
 
															 }
														
 
															 # convert to mass units where possible
														
 
															-entities_to_convert_to_mass = [
														
 
															-    'CH4', 'N2O', 'SF6'
														
 
															-]
														
 
															+entities_to_convert_to_mass = ["CH4", "N2O", "SF6"]
														
 
															-# CO2 equivalents don't make sense for these substances, so unit has to be Gg instead of Gg CO2 equivalents as indicated in the table
														
 
															-entities_to_fix_unit = [
														
 
															-    'NOx', 'CO', 'NMVOCs', 'SO2'
														
 
															-]
														
 
															+# CO2 equivalents don't make sense for these substances, so unit has to be Gg instead
														
 
															+# of Gg CO2 equivalents as indicated in the table
														
 
															+entities_to_fix_unit = ["NOx", "CO", "NMVOCs", "SO2"]
														
 
															 ### processing
														
 
															 processing_info_step1 = {
														
 
															-    'aggregate_cats': {
														
 
															-        '2.F': {'sources': ['2.F.2', '2.F.6'], # all 0, but for completeness
														
 
															-              'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
														
 
															-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G'],
														
 
															-              'name': 'IPPU'}, # for HFCs, PFCs, SO2, SF6, N2O (all 0)
														
 
															+    "aggregate_cats": {
														
 
															+        "2.F": {
														
 
															+            "sources": ["2.F.2", "2.F.6"],  # all 0, but for completeness
														
 
															+            "name": "Product uses as Substitutes for Ozone Depleting Substances",
														
 
															+        },
														
 
															+        "2": {
														
 
															+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G"],
														
 
															+            "name": "IPPU",
														
 
															+        },  # for HFCs, PFCs, SO2, SF6, N2O (all 0)
														
 
															     },
														
 
															 }
														
 
															-processing_info_step2 =  {
														
 
															-    'aggregate_cats': {
														
 
															-        'M.AG.ELV': {'sources': ['3.C'], 'name': 'Agriculture excluding livestock emissions'},
														
 
															-        'M.AG': {'sources': ['M.AG.ELV', '3.A'], 'name': 'Agriculture'},
														
 
															-        'M.LULUCF': {'sources': ['3.B', '3.D'],
														
 
															-                     'name': 'Land Use, Land Use Change, and Forestry'},
														
 
															-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National Total Excluding LULUCF'},
														
 
															-        '0': {'sources': ['1', '2', '3', '4', '5'], 'name': 'National Total'},
														
 
															+processing_info_step2 = {
														
 
															+    "aggregate_cats": {
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["3.C"],
														
 
															+            "name": "Agriculture excluding livestock emissions",
														
 
															+        },
														
 
															+        "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
														
 
															+        "M.LULUCF": {
														
 
															+            "sources": ["3.B", "3.D"],
														
 
															+            "name": "Land Use, Land Use Change, and Forestry",
														
 
															+        },
														
 
															+        "M.0.EL": {
														
 
															+            "sources": ["1", "2", "M.AG", "4", "5"],
														
 
															+            "name": "National Total Excluding LULUCF",
														
 
															+        },
														
 
															+        "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
														
 
															     },
														
 
															-    'downscale': {
														
 
															-        'sectors': {
														
 
															-            '1': {
														
 
															-                'basket': '1',
														
 
															-                'basket_contents': ['1.A', '1.B', '1.C'],
														
 
															-                'entities': ['CO2', 'N2O', 'CH4'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+    "downscale": {
														
 
															+        "sectors": {
														
 
															+            "1": {
														
 
															+                "basket": "1",
														
 
															+                "basket_contents": ["1.A", "1.B", "1.C"],
														
 
															+                "entities": ["CO2", "N2O", "CH4"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            '1.A': {
														
 
															-                'basket': '1.A',
														
 
															-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
														
 
															-                'entities': ['CO2', 'N2O', 'CH4'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "1.A": {
														
 
															+                "basket": "1.A",
														
 
															+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
														
 
															+                "entities": ["CO2", "N2O", "CH4"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            '1.B': {
														
 
															-                'basket': '1.B',
														
 
															-                'basket_contents': ['1.B.1', '1.B.2', '1.B.3'],
														
 
															-                'entities': ['CO2', 'N2O', 'CH4'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "1.B": {
														
 
															+                "basket": "1.B",
														
 
															+                "basket_contents": ["1.B.1", "1.B.2", "1.B.3"],
														
 
															+                "entities": ["CO2", "N2O", "CH4"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            'IPPU': {
														
 
															-                'basket': '2',
														
 
															-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.E',
														
 
															-                                    '2.F', '2.G', '2.H'],
														
 
															-                'entities': ['CO2', 'N2O', 'CH4'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "IPPU": {
														
 
															+                "basket": "2",
														
 
															+                "basket_contents": [
														
 
															+                    "2.A",
														
 
															+                    "2.B",
														
 
															+                    "2.C",
														
 
															+                    "2.D",
														
 
															+                    "2.E",
														
 
															+                    "2.F",
														
 
															+                    "2.G",
														
 
															+                    "2.H",
														
 
															+                ],
														
 
															+                "entities": ["CO2", "N2O", "CH4"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            '3': {
														
 
															-                'basket': '3',
														
 
															-                'basket_contents': ['3.A', '3.B', '3.C', '3.D'],
														
 
															-                'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "3": {
														
 
															+                "basket": "3",
														
 
															+                "basket_contents": ["3.A", "3.B", "3.C", "3.D"],
														
 
															+                "entities": ["CO2", "CH4", "N2O"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															             # '3A': {
														
 
															             #     'basket': '3.A',
														
@@ -442,17 +460,21 @@ processing_info_step2 =  {
 
															             # },
														
 
															         },
														
 
															     },
														
 
															-    'remove_ts': {
														
 
															-        'fgases': { # unnecessary and complicates aggregation for
														
 
															+    "remove_ts": {
														
 
															+        "fgases": {  # unnecessary and complicates aggregation for
														
 
															             # other gases
														
 
															-            'category': ['5'],
														
 
															-            'entities': [f'HFCS ({gwp_to_use})', f'PFCS ({gwp_to_use})', 'SF6',
														
 
															-                         f'UnspMixOfHFCs ({gwp_to_use})'],
														
 
															+            "category": ["5"],
														
 
															+            "entities": [
														
 
															+                f"HFCS ({gwp_to_use})",
														
 
															+                f"PFCS ({gwp_to_use})",
														
 
															+                "SF6",
														
 
															+                f"UnspMixOfHFCs ({gwp_to_use})",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR4GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS", "UnspMixOfHFCs"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS", "UnspMixOfHFCs"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
@@ -1,5 +1,10 @@
 
															-# this script reads data from Nigeria's BUR2
														
 
															-# Data is read from the pdf file
														
 
															+"""
														
 
															+Read nigeria's BUR2 from pdf
														
 
															+
														
 
															+This script reads data from Nigeria's BUR2
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															 import locale
														
 
															 from copy import deepcopy
														
@@ -9,32 +14,32 @@ import numpy as np
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															 import xarray as xr
														
 
															-from .config_nga_bur2 import (
														
 
															-   cat_code_regexp,
														
 
															-   cat_codes_manual,
														
 
															-   coords_cols,
														
 
															-   coords_defaults,
														
 
															-   coords_terminologies,
														
 
															-   coords_value_mapping,  #, add_coords_cols
														
 
															-   entity_row,
														
 
															-   filter_remove,
														
 
															-   header_long,
														
 
															-   index_cols,
														
 
															-   meta_data,
														
 
															-   pages_inventory,
														
 
															-   processing_info_step1,
														
 
															-   processing_info_step2,
														
 
															-   tables_trends,
														
 
															-   unit_row,
														
 
															-   units_inv,
														
 
															-   year_inventory,
														
 
															+from config_nga_bur2 import (
														
 
															+    cat_code_regexp,
														
 
															+    cat_codes_manual,
														
 
															+    coords_cols,
														
 
															+    coords_defaults,
														
 
															+    coords_terminologies,
														
 
															+    coords_value_mapping,  # , add_coords_cols
														
 
															+    entity_row,
														
 
															+    filter_remove,
														
 
															+    header_long,
														
 
															+    index_cols,
														
 
															+    meta_data,
														
 
															+    pages_inventory,
														
 
															+    processing_info_step1,
														
 
															+    processing_info_step2,
														
 
															+    tables_trends,
														
 
															+    unit_row,
														
 
															+    units_inv,
														
 
															+    year_inventory,
														
 
															 )
														
 
															 from unfccc_ghg_data.helper import (
														
 
															-   downloaded_data_path,
														
 
															-   extracted_data_path,
														
 
															-   gas_baskets,
														
 
															-   process_data_for_country,
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    gas_baskets,
														
 
															+    process_data_for_country,
														
 
															 )
														
 
															 if __name__ == "__main__":
														
@@ -42,61 +47,74 @@ if __name__ == "__main__":
 
															     # configuration
														
 
															     # ###
														
 
															     # define locale to use for str to float conversion
														
 
															-    locale_to_use = 'en_NG.UTF-8'
														
 
															+    locale_to_use = "en_NG.UTF-8"
														
 
															     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Nigeria' / 'BUR2'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Nigeria'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Nigeria" / "BUR2"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Nigeria"
														
 
															     if not output_folder.exists():
														
 
															-       output_folder.mkdir()
														
 
															+        output_folder.mkdir()
														
 
															-    output_filename = 'NGA_BUR2_2021_'
														
 
															+    output_filename = "NGA_BUR2_2021_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															-    inventory_file = 'NIGERIA_BUR_2_-_Second_Biennial_Update_Report_%28BUR2%29.pdf'
														
 
															+    inventory_file = "NIGERIA_BUR_2_-_Second_Biennial_Update_Report_%28BUR2%29.pdf"
														
 
															     ## read 2019 inventory
														
 
															     df_inventory = None
														
 
															     for page in pages_inventory.keys():
														
 
															-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=str(page),
														
 
															-                                  flavor='lattice')
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / inventory_file), pages=str(page), flavor="lattice"
														
 
															+        )
														
 
															         df_this_table = tables[pages_inventory[page]].df
														
 
															         # replace line breaks, double, and triple spaces in category names
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
														
 
															         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
														
 
															         # replace line breaks in units and entities
														
 
															-        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace('\n',
														
 
															-                                                                                    '')
														
 
															-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace('\n', '')
														
 
															+        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace(
														
 
															+            "\n", ""
														
 
															+        )
														
 
															+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace(
														
 
															+            "\n", ""
														
 
															+        )
														
 
															         # fillna in unit row
														
 
															-        df_this_table.iloc[unit_row][df_this_table.iloc[unit_row]==""] = np.nan
														
 
															-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].fillna(
														
 
															-            method='ffill')
														
 
															-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
														
 
															-                                                           entity_row=entity_row,
														
 
															-                                                           regexp_entity=".*",
														
 
															-                                                           manual_repl_unit=units_inv,
														
 
															-                                                           default_unit="")
														
 
															+        df_this_table.iloc[unit_row][df_this_table.iloc[unit_row] == ""] = np.nan
														
 
															+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].ffill()
														
 
															+        df_this_table = pm2.pm2io.nir_add_unit_information(
														
 
															+            df_this_table,
														
 
															+            unit_row=unit_row,
														
 
															+            entity_row=entity_row,
														
 
															+            regexp_entity=".*",
														
 
															+            manual_repl_unit=units_inv,
														
 
															+            default_unit="",
														
 
															+        )
														
 
															         # set index and convert to long format
														
 
															         df_this_table = df_this_table.set_index(index_cols)
														
 
															-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year_inventory,
														
 
															-                                                              header_long)
														
 
															+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            df_this_table, year_inventory, header_long
														
 
															+        )
														
 
															         # combine with tables for other sectors (merge not append)
														
 
															         if df_inventory is None:
														
 
															             df_inventory = df_this_table_long
														
 
															         else:
														
 
															-            df_inventory = pd.concat([df_inventory, df_this_table_long], axis=0, join='outer')
														
 
															+            df_inventory = pd.concat(
														
 
															+                [df_inventory, df_this_table_long], axis=0, join="outer"
														
 
															+            )
														
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															     df_inventory["category"] = df_inventory["category"].replace(cat_codes_manual)
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-       return m.group('code')
														
 
															-    df_inventory["category"] = df_inventory["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_inventory["category"] = df_inventory["category"].str.replace(
														
 
															+        cat_code_regexp, repl, regex=True
														
 
															+    )
														
 
															     df_inventory = df_inventory.reset_index(drop=True)
														
 
															     # ###
														
@@ -105,15 +123,15 @@ if __name__ == "__main__":
 
															     data_inv_if = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_inventory,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															         filter_remove=filter_remove,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-        time_format='%Y',
														
 
															-        )
														
 
															+        time_format="%Y",
														
 
															+    )
														
 
															     data_inv_pm2 = pm2.pm2io.from_interchange_format(data_inv_if)
														
@@ -122,19 +140,21 @@ if __name__ == "__main__":
 
															     for table in tables_trends.keys():
														
 
															         print(table)
														
 
															         current_table = deepcopy(tables_trends[table])
														
 
															-        tables = camelot.read_pdf(str(input_folder / inventory_file),
														
 
															-                                  pages=current_table["page"],
														
 
															-                                  table_areas=current_table["area"],
														
 
															-                                  columns=current_table["cols"],
														
 
															-                                  flavor='stream',
														
 
															-                                  split_text=True)
														
 
															+        tables = camelot.read_pdf(
														
 
															+            str(input_folder / inventory_file),
														
 
															+            pages=current_table["page"],
														
 
															+            table_areas=current_table["area"],
														
 
															+            columns=current_table["cols"],
														
 
															+            flavor="stream",
														
 
															+            split_text=True,
														
 
															+        )
														
 
															         df_this_table = tables[0].df
														
 
															         # merge rows for entity and unit
														
 
															         rows_to_merge = df_this_table.iloc[current_table["label_rows"]]
														
 
															         indices_to_merge = rows_to_merge.index
														
 
															         # join the three rows
														
 
															-        new_row = rows_to_merge.agg(' '.join)
														
 
															+        new_row = rows_to_merge.agg(" ".join)
														
 
															         df_this_table.loc[indices_to_merge[0]] = new_row
														
 
															         df_this_table = df_this_table.drop(indices_to_merge)
														
 
															         new_row = new_row.str.replace("  ", " ")
														
@@ -144,7 +164,7 @@ if __name__ == "__main__":
 
															         df_this_table.columns = new_row
														
 
															         # remove columns not needed
														
 
															-        if 'remove_cols' in current_table.keys():
														
 
															+        if "remove_cols" in current_table.keys():
														
 
															             df_this_table = df_this_table.drop(columns=current_table["remove_cols"])
														
 
															         df_this_table = df_this_table.set_index("Year")
														
@@ -155,12 +175,14 @@ if __name__ == "__main__":
 
															         # remove "," (thousand sep) from data
														
 
															         for col in df_this_table.columns:
														
 
															             df_this_table.loc[:, col] = df_this_table.loc[:, col].str.strip()
														
 
															-            def repl(m):
														
 
															-               return m.group('part1') + m.group('part2')
														
 
															-            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
														
 
															-                '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-            df_this_table[col][df_this_table[col].isnull()] = 'NaN'
														
 
															+            def repl(m):  # noqa: D103
														
 
															+                return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
														
 
															+                "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+            )
														
 
															+            df_this_table[col][df_this_table[col].isna()] = "NaN"
														
 
															         # metadta in forst col instread of index
														
 
															         df_this_table = df_this_table.reset_index()
														
@@ -170,7 +192,7 @@ if __name__ == "__main__":
 
															         df_this_table.columns = df_this_table.columns.map(str)
														
 
															         # make copy of columns if a column is used twice for metadata
														
 
															-        if 'copy_cols' in current_table.keys():
														
 
															+        if "copy_cols" in current_table.keys():
														
 
															             for col in current_table["copy_cols"]:
														
 
															                 df_this_table[col] = df_this_table[current_table["copy_cols"][col]]
														
@@ -184,7 +206,7 @@ if __name__ == "__main__":
 
															             coords_value_mapping=current_table["coords_value_mapping"],
														
 
															             meta_data=meta_data,
														
 
															             convert_str=True,
														
 
															-            time_format='%Y',
														
 
															+            time_format="%Y",
														
 
															         )
														
 
															         data_current_pm2 = pm2.pm2io.from_interchange_format(data_current_if)
														
@@ -193,7 +215,7 @@ if __name__ == "__main__":
 
															         else:
														
 
															             data_trend_pm2 = data_trend_pm2.pr.merge(data_current_pm2)
														
 
															-    data_pm2 = data_inv_pm2.pr.merge(data_trend_pm2, tolerance=0.02) # some rounding in
														
 
															+    data_pm2 = data_inv_pm2.pr.merge(data_trend_pm2, tolerance=0.02)  # some rounding in
														
 
															     # trends needs higher tolerance
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
@@ -205,48 +227,59 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															-        data_if)
														
 
															+        data_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] +
														
 
															-                         "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															-
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     #### processing
														
 
															     data_proc_pm2 = data_pm2
														
 
															     terminology_proc = coords_terminologies["category"]
														
 
															     # combine CO2 emissions and removals
														
 
															-    temp_CO2 = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
														
 
															-        (dim="entity", skipna=True, min_count=1)
														
 
															+    temp_CO2 = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
														
 
															+        dim="entity", skipna=True, min_count=1
														
 
															+    )
														
 
															     data_proc_pm2["CO2"] = data_proc_pm2["CO2"].fillna(temp_CO2)
														
 
															     # create net KYOTOGHG for 0 and 3
														
 
															-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] \
														
 
															-        = xr.full_like(data_proc_pm2["CO2 removals"],
														
 
															-                       np.nan).pr.quantify(units="Gg CO2 / year")
														
 
															-
														
 
															-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"].attrs = {"entity": "KYOTOGHG",
														
 
															-                                                            "gwp_context": "AR5GWP100"}
														
 
															-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] \
														
 
															-        = data_proc_pm2.pr.gas_basket_contents_sum(
														
 
															-        basket="KYOTOGHG removals (AR5GWP100)", basket_contents=['CO2 removals'],
														
 
															-        skipna=True, min_count=1)
														
 
															-    temp_KYOTOGHG = data_proc_pm2[["KYOTOGHG emissions (AR5GWP100)",
														
 
															-                                   "KYOTOGHG removals (AR5GWP100)"]].pr.sum\
														
 
															-        (dim="entity", skipna=True, min_count=1)
														
 
															-    data_proc_pm2["KYOTOGHG (AR5GWP100)"] \
														
 
															-        = data_proc_pm2["KYOTOGHG (AR5GWP100)"].fillna(temp_KYOTOGHG)
														
 
															-
														
 
															+    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] = xr.full_like(
														
 
															+        data_proc_pm2["CO2 removals"], np.nan
														
 
															+    ).pr.quantify(units="Gg CO2 / year")
														
 
															+
														
 
															+    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"].attrs = {
														
 
															+        "entity": "KYOTOGHG",
														
 
															+        "gwp_context": "AR5GWP100",
														
 
															+    }
														
 
															+    data_proc_pm2[
														
 
															+        "KYOTOGHG removals (AR5GWP100)"
														
 
															+    ] = data_proc_pm2.pr.gas_basket_contents_sum(
														
 
															+        basket="KYOTOGHG removals (AR5GWP100)",
														
 
															+        basket_contents=["CO2 removals"],
														
 
															+        skipna=True,
														
 
															+        min_count=1,
														
 
															+    )
														
 
															+    temp_KYOTOGHG = data_proc_pm2[
														
 
															+        ["KYOTOGHG emissions (AR5GWP100)", "KYOTOGHG removals (AR5GWP100)"]
														
 
															+    ].pr.sum(dim="entity", skipna=True, min_count=1)
														
 
															+    data_proc_pm2["KYOTOGHG (AR5GWP100)"] = data_proc_pm2[
														
 
															+        "KYOTOGHG (AR5GWP100)"
														
 
															+    ].fillna(temp_KYOTOGHG)
														
 
															     # actual processing
														
 
															     data_proc_pm2 = process_data_for_country(
														
 
															         data_proc_pm2,
														
 
															-        entities_to_ignore=['CO2 emissions', 'CO2 removals',
														
 
															-                            'KYOTOGHG emissions (AR5GWP100)',
														
 
															-                            'KYOTOGHG removals (AR5GWP100)'],
														
 
															+        entities_to_ignore=[
														
 
															+            "CO2 emissions",
														
 
															+            "CO2 removals",
														
 
															+            "KYOTOGHG emissions (AR5GWP100)",
														
 
															+            "KYOTOGHG removals (AR5GWP100)",
														
 
															+        ],
														
 
															         gas_baskets={},
														
 
															         processing_info_country=processing_info_step1,
														
 
															     )
														
@@ -256,16 +289,16 @@ if __name__ == "__main__":
 
															         entities_to_ignore=[],
														
 
															         gas_baskets=gas_baskets,
														
 
															         processing_info_country=processing_info_step2,
														
 
															-        cat_terminology_out = terminology_proc,
														
 
															-        #category_conversion = None,
														
 
															-        #sectors_out = None,
														
 
															+        cat_terminology_out=terminology_proc,
														
 
															+        # category_conversion = None,
														
 
															+        # sectors_out = None,
														
 
															     )
														
 
															     # adapt source and metadata
														
 
															     # TODO: processing info is present twice
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -274,9 +307,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Peru's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'PER'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=PER
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py
@@ -1,3 +1,9 @@
 
															+"""Config for Peru's BUR3
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 table_def_templates = {
														
 
															     "300": {  # 300
														
 
															         "area": ["69,457,727,78"],
														
@@ -486,75 +492,80 @@ meta_data = {
 
															 ## processing
														
 
															 cat_conversion = {
														
 
															-    'mapping': {
														
 
															-        '0': '0',
														
 
															-        '1': '1',
														
 
															-        '1.A': '1.A',
														
 
															-        '1.A.1': '1.A.1',
														
 
															-        '1.A.2': '1.A.2',
														
 
															-        '1.A.3': '1.A.3',
														
 
															-        '1.A.4': '1.A.4',
														
 
															-        '1.A.5': '1.A.5',
														
 
															-        '1.B': '1.B',
														
 
															-        '1.B.1': '1.B.1',
														
 
															-        '1.B.2': '1.B.2',
														
 
															-        '2': '2',
														
 
															-        '2.A': '2.A',
														
 
															-        '2.B': '2.B',
														
 
															-        '2.C': '2.C',
														
 
															-        '2.D': '2.D',
														
 
															-        '2.E': '2.E',
														
 
															-        '2.F': '2.F',
														
 
															-        '2.G': '2.G',
														
 
															-        '2.H': '2.H',
														
 
															-        '3': 'M.AG',
														
 
															-        '3.A': '3.A',
														
 
															-        '3.A.1': '3.A.1',
														
 
															-        '3.A.2': '3.A.2',
														
 
															-        '3.C': '3.C',
														
 
															-        '3.C.1': '3.C.1',
														
 
															-        '3.C.2': '3.C.2',
														
 
															-        '3.C.3': '3.C.3',
														
 
															-        '3.C.4': '3.C.4',
														
 
															-        '3.C.5': '3.C.5',
														
 
															-        '3.C.6': '3.C.6',
														
 
															-        '3.C.7': '3.C.7',
														
 
															-        '4': 'M.LULUCF',
														
 
															-        'M.2006.3.B': '3.B',
														
 
															-        '4.A': '3.B.1',
														
 
															-        '4.B': '3.B.2',
														
 
															-        '4.C': '3.B.3',
														
 
															-        '4.D': '3.B.4',
														
 
															-        '4.E': '3.B.5',
														
 
															-        '4.F': '3.B.6',
														
 
															-        '4.G': '3.D.1',
														
 
															-        '5': '4',
														
 
															-        '5.A': '4.A',
														
 
															-        '5.B': '4.B',
														
 
															-        '5.C': '4.C',
														
 
															-        '5.D': '4.D',
														
 
															-        'M.BK': 'M.BK',
														
 
															-        'M.BK.A': 'M.BK.A',
														
 
															-        'M.BK.M': 'M.BM.M',
														
 
															-        'M.BIO': 'M.BIO',
														
 
															-    },
														
 
															-    'aggregate': {
														
 
															-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
														
 
															-              'name': 'IPPU'},
														
 
															-        'M.3.C.AG': {
														
 
															-            'sources': ['3.C'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
														
 
															-                     'name': 'Agriculture excluding livestock emissions'},
														
 
															-        '3.D': {'sources': ['3.D.1'], 'name': 'Other'},
														
 
															-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															+    "mapping": {
														
 
															+        "0": "0",
														
 
															+        "1": "1",
														
 
															+        "1.A": "1.A",
														
 
															+        "1.A.1": "1.A.1",
														
 
															+        "1.A.2": "1.A.2",
														
 
															+        "1.A.3": "1.A.3",
														
 
															+        "1.A.4": "1.A.4",
														
 
															+        "1.A.5": "1.A.5",
														
 
															+        "1.B": "1.B",
														
 
															+        "1.B.1": "1.B.1",
														
 
															+        "1.B.2": "1.B.2",
														
 
															+        "2": "2",
														
 
															+        "2.A": "2.A",
														
 
															+        "2.B": "2.B",
														
 
															+        "2.C": "2.C",
														
 
															+        "2.D": "2.D",
														
 
															+        "2.E": "2.E",
														
 
															+        "2.F": "2.F",
														
 
															+        "2.G": "2.G",
														
 
															+        "2.H": "2.H",
														
 
															+        "3": "M.AG",
														
 
															+        "3.A": "3.A",
														
 
															+        "3.A.1": "3.A.1",
														
 
															+        "3.A.2": "3.A.2",
														
 
															+        "3.C": "3.C",
														
 
															+        "3.C.1": "3.C.1",
														
 
															+        "3.C.2": "3.C.2",
														
 
															+        "3.C.3": "3.C.3",
														
 
															+        "3.C.4": "3.C.4",
														
 
															+        "3.C.5": "3.C.5",
														
 
															+        "3.C.6": "3.C.6",
														
 
															+        "3.C.7": "3.C.7",
														
 
															+        "4": "M.LULUCF",
														
 
															+        "M.2006.3.B": "3.B",
														
 
															+        "4.A": "3.B.1",
														
 
															+        "4.B": "3.B.2",
														
 
															+        "4.C": "3.B.3",
														
 
															+        "4.D": "3.B.4",
														
 
															+        "4.E": "3.B.5",
														
 
															+        "4.F": "3.B.6",
														
 
															+        "4.G": "3.D.1",
														
 
															+        "5": "4",
														
 
															+        "5.A": "4.A",
														
 
															+        "5.B": "4.B",
														
 
															+        "5.C": "4.C",
														
 
															+        "5.D": "4.D",
														
 
															+        "M.BK": "M.BK",
														
 
															+        "M.BK.A": "M.BK.A",
														
 
															+        "M.BK.M": "M.BM.M",
														
 
															+        "M.BIO": "M.BIO",
														
 
															+    },
														
 
															+    "aggregate": {
														
 
															+        "2": {
														
 
															+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
														
 
															+            "name": "IPPU",
														
 
															+        },
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": ["3.C"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+        },
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG"],
														
 
															+            "name": "Agriculture excluding livestock emissions",
														
 
															+        },
														
 
															+        "3.D": {"sources": ["3.D.1"], "name": "Other"},
														
 
															+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															     },
														
 
															 }
														
 
															 processing_info = {
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR4GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
@@ -1,12 +1,17 @@
 
															-# read Singapore fifth BUR from pdf
														
 
															+"""
														
 
															+Read Peru's BUR3 from pdf
														
 
															+This script reads data from Peru's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															 import locale
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_per_bur3 import (
														
 
															+from config_per_bur3 import (
														
 
															     cat_code_regexp,
														
 
															     cat_codes_manual,
														
 
															     cat_conversion,
														
@@ -103,20 +108,22 @@ if __name__ == "__main__":
 
															             # drop cols if necessary
														
 
															             if "drop_cols" in table_defs[page].keys():
														
 
															-                # print(df_current.columns.values)
														
 
															+                # print(df_current.columns.to_numpy())
														
 
															                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
														
 
															             elif "drop_cols" in table_def_templates[table_on_page].keys():
														
 
															                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
														
 
															             # rename category column
														
 
															-            df_current.rename(
														
 
															-                columns={table_defs[page]["category_col"]: index_cols[0]}, inplace=True
														
 
															+            df_current = df_current.rename(
														
 
															+                columns={table_defs[page]["category_col"]: index_cols[0]}
														
 
															             )
														
 
															             # replace double \n
														
 
															             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
														
 
															             # replace double and triple spaces
														
 
															-            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
														
 
															+            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace(
														
 
															+                "   ", " "
														
 
															+            )
														
 
															             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
														
 
															             # fix the split rows
														
@@ -137,7 +144,7 @@ if __name__ == "__main__":
 
															             # set index
														
 
															             # df_current = df_current.set_index(index_cols)
														
 
															             # strip trailing and leading  and remove "^"
														
 
															-            for col in df_current.columns.values:
														
 
															+            for col in df_current.columns.to_numpy():
														
 
															                 df_current[col] = df_current[col].str.strip()
														
 
															                 df_current[col] = df_current[col].str.replace("^", "")
														
@@ -147,9 +154,9 @@ if __name__ == "__main__":
 
															                 df_this_page = df_current.copy(deep=True)
														
 
															             else:
														
 
															                 # find intersecting cols
														
 
															-                cols_this_page = df_this_page.columns.values
														
 
															+                cols_this_page = df_this_page.columns.to_numpy()
														
 
															                 # print(f"cols this page: {cols_this_page}")
														
 
															-                cols_current = df_current.columns.values
														
 
															+                cols_current = df_current.columns.to_numpy()
														
 
															                 # print(f"cols current: {cols_current}")
														
 
															                 cols_both = list(set(cols_this_page).intersection(set(cols_current)))
														
 
															                 # print(f"cols both: {cols_both}")
														
@@ -179,7 +186,9 @@ if __name__ == "__main__":
 
															         # drop the rows with memo items etc
														
 
															         for cat in cats_remove:
														
 
															             df_this_page_long = df_this_page_long.drop(
														
 
															-                df_this_page_long.loc[df_this_page_long.loc[:, index_cols[0]] == cat].index
														
 
															+                df_this_page_long.loc[
														
 
															+                    df_this_page_long.loc[:, index_cols[0]] == cat
														
 
															+                ].index
														
 
															             )
														
 
															         # make a copy of the categories row
														
@@ -187,12 +196,14 @@ if __name__ == "__main__":
 
															         # replace cat names by codes in col "Categories"
														
 
															         # first the manual replacements
														
 
															-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, "category"].replace(
														
 
															-            cat_codes_manual
														
 
															-        )
														
 
															+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
														
 
															+            :, "category"
														
 
															+        ].replace(cat_codes_manual)
														
 
															+
														
 
															         # then the regex replacements
														
 
															-        def repl(m):
														
 
															+        def repl(m):  # noqa: D103
														
 
															             return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															         df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
														
 
															             :, "category"
														
 
															         ].str.replace(cat_code_regexp, repl, regex=True)
														
@@ -211,8 +222,10 @@ if __name__ == "__main__":
 
															             ".", ""
														
 
															         )
														
 
															         pat = r"^(?P<first>[0-9\.,]*),(?P<last>[0-9\.,]*)$"
														
 
															-        def repl(m):
														
 
															+
														
 
															+        def repl(m):  # noqa: D103
														
 
															             return f"{m.group('first')}.{m.group('last')}"
														
 
															+
														
 
															         df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(
														
 
															             pat, repl, regex=True
														
 
															         )
														
@@ -265,12 +278,11 @@ if __name__ == "__main__":
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															         encoding=encoding,
														
 
															     )
														
 
															-    #### continue here
														
 
															-
														
 
															     # ###
														
 
															     # ## process the data
														
 
															     # ###
														
@@ -288,7 +300,7 @@ if __name__ == "__main__":
 
															     )
														
 
															     # adapt source and metadata
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															     data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
@@ -305,6 +317,7 @@ if __name__ == "__main__":
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															         encoding=encoding,
														
 
															     )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read South Korea's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'KOR'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=KOR
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/config_kor_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/config_kor_bur4.py
@@ -1,413 +1,513 @@
 
															+"""Config for South Korea's 2021 and 2022 inventories and BUR4
														
 
															+
														
 
															+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
														
 
															+config and metadata are define din the reading script
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 original_names = [
														
 
															-    '총배출량',
														
 
															-    '순배출량',
														
 
															-    '에너지',
														
 
															-    'A. 연료연소',
														
 
															-    '1. 에너지산업',
														
 
															-    'a. 공공전기 및 열 생산',
														
 
															-    'b. 석유정제',
														
 
															-    'c. 고체연료 제조 및 기타 에너지 산업',
														
 
															-    '2. 제조업 및 건설업',
														
 
															-    'a. 철강',
														
 
															-    'b. 비철금속',
														
 
															-    'c. 화학',
														
 
															-    'd. 펄프, 제지 및 인쇄',
														
 
															-    'e. 식음료품 가공 및 담배 제조',
														
 
															-    'f. 기타',
														
 
															-    '  1. 비금속',
														
 
															-    '  2. 조립금속',
														
 
															-    '  3. 나무 및 목재',
														
 
															-    '  4. 건설',
														
 
															-    '  5. 섬유 및 가죽',
														
 
															-    '  6. 기타제조',
														
 
															-    '3. 수송',
														
 
															-    'a. 민간항공',
														
 
															-    'b. 도로수송',
														
 
															-    'c. 철도',
														
 
															-    'd. 해운',
														
 
															-    'e. 기타수송',
														
 
															-    '4. 기타',
														
 
															-    'a. 상업/공공',
														
 
															-    'b. 가정',
														
 
															-    'c. 농업/임업/어업',
														
 
															-    '5. 미분류',
														
 
															-    'B. 탈루',
														
 
															-    '1. 고체연료',
														
 
															-    '2.  석유 및 천연가스',
														
 
															-    'a.  석유',
														
 
															-    'b. 천연가스',
														
 
															-    '산업공정',
														
 
															-    'A. 광물산업',
														
 
															-    '1. 시멘트생산',
														
 
															-    '2. 석회생산',
														
 
															-    '3. 석회석 및 백운석 소비',
														
 
															-    '4. 소다회 생산 및 소비',
														
 
															-    '5. 아스팔트 루핑',
														
 
															-    '6. 아스팔트 도로포장',
														
 
															-    'B. 화학산업',
														
 
															-    'C. 금속산업',
														
 
															-    '1. 철강생산',
														
 
															-    '2. 합금철 생산',
														
 
															-    '3. 알루미늄 생산',
														
 
															-    '4. 마그네슘 생산의 SF6 소비',
														
 
															-    'D. 기타산업',
														
 
															-    'E. 할로카본 및 육불화황 생산',
														
 
															-    '1. 부산물 배출',
														
 
															-    '2. 탈루 배출',
														
 
															-    'F. 할로카본 및 육불화황 소비',
														
 
															-    '1.  냉장 및 냉방',
														
 
															-    '2.  발포제',
														
 
															-    '3.  소화기',
														
 
															-    '4.  에어로졸',
														
 
															-    '5.  용매',
														
 
															-    '6.  기타 용도의 ODS 대체물질 사용',
														
 
															-    '7.  반도체 제조',
														
 
															-    '8.  중전기기',
														
 
															-    '9.  기타(잠재배출량)',
														
 
															-    '농업',
														
 
															-    'A.  장내발효',
														
 
															-    '1. 소',
														
 
															-    '2. 물소',
														
 
															-    '3. 양(면양)',
														
 
															-    '4. 양(산양)',
														
 
															-    '5. 낙타 및 라마',
														
 
															-    '6. 말',
														
 
															-    '7. 노새 및 당나귀',
														
 
															-    '8. 돼지',
														
 
															-    '9. 가금류',
														
 
															-    '10. 기타 가축(사슴)',
														
 
															-    'B.  가축분뇨처리',
														
 
															-    '1. 소',
														
 
															-    '2. 물소',
														
 
															-    '3. 양(면양)',
														
 
															-    '4. 양(산양)',
														
 
															-    '5. 낙타 및 라마',
														
 
															-    '6. 말',
														
 
															-    '7. 노새 및 당나귀',
														
 
															-    '8. 돼지',
														
 
															-    '9. 가금류',
														
 
															-    '10. 기타 가축(사슴)',
														
 
															-    'C.  벼재배',
														
 
															-    '1. 관개',
														
 
															-    '2. 천수답',
														
 
															-    'D. 농경지토양',
														
 
															-    '1. 직접배출',
														
 
															-    '2. 목장, 방목구역, 분료(거름)',
														
 
															-    '3. 간접배출',
														
 
															-    'E. 사바나 소각',
														
 
															-    'F. 작물잔사소각',
														
 
															-    '1. 곡류',
														
 
															-    '2. 두류(콩)',
														
 
															-    '3. 근채류',
														
 
															-    '4. 사탕수수',
														
 
															-    '5. 기타',
														
 
															-    'LULUCF',
														
 
															-    'A. 산림지',
														
 
															-    '1. 산림지로 유지된 산림지',
														
 
															-    '2. 타토지에서 전용된 산림지',
														
 
															-    '3. 산림지에서 질소 시비로 인한 N2O 배출',
														
 
															-    '4. 산림지에서 배수로 인한 Non-CO2 배출',
														
 
															-    '5. 산림지에서 바이오매스 연소에 의한 배출',
														
 
															-    'B. 농경지',
														
 
															-    '1. 농경지로 유지된 농경지',
														
 
															-    '2. 타토지에서 전용된 농경지',
														
 
															-    '3. 농경지로의 전용에 따른 N2O 배출',
														
 
															-    '4. 농경지에서 농업용 석회시용으로 인한 CO2 배출',
														
 
															-    '5. 농경지에서 바이오매스 연소에 의한 배출',
														
 
															-    'C. 초지',
														
 
															-    '1. 초지로 유지된 초지',
														
 
															-    '2. 타토지에서 전용된 초지',
														
 
															-    '3. 초지에서 농업용 석회시용으로 인한 CO2 배출',
														
 
															-    '4. 초지에서 바이오매스 연소에 의한 배출',
														
 
															-    'D. 습지',
														
 
															-    '1. 습지로 유지된 습지',
														
 
															-    '2. 타토지에서 전용된 습지',
														
 
															-    '3. 습지에서 배수로 인한 Non-CO2 배출',
														
 
															-    '4. 습지에서 바이오매스 연소에 의한 배출',
														
 
															-    'E. 정주지',
														
 
															-    'F. 기타토지',
														
 
															-    '폐기물',
														
 
															-    'A. 폐기물매립',
														
 
															-    '1. 관리형 매립',
														
 
															-    '2. 비관리형 매립',
														
 
															-    'B. 하폐수처리',
														
 
															-    '1. 폐수처리',
														
 
															-    '2. 하수처리',
														
 
															-    'C. 폐기물소각',
														
 
															-    'D. 기타',
														
 
															-    '별도항목(Memo Item)',
														
 
															-    '분야·부문/연도',
														
 
															-    'C. 국제벙커링 및 다국적 작전',
														
 
															-    '1. 벙커링',
														
 
															-    'a. 국제 항공',
														
 
															-    'b. 국제 해운',
														
 
															-    '2. 다국적 작전',
														
 
															-    '* 참고 : NO = 배출활동 및 공정이 없는 경우, NE = 산정하지 아니하는 경우, NA = 자연적, 이론적으로 발생하지 않는 활동 및 공정의 경우, IE = 다른 항목에 포함하여 보고하는 경우, C = 기밀정보인 경우',
														
 
															-    '3. 타토지로 전용된 농경지', # start of new codes in 2021 inventory
														
 
															-    '4. 농경지로의 전용에 따른 N2O 배출',
														
 
															-    '5. 농경지에서 농업용 석회시용으로 인한 CO2 배출',
														
 
															-    '6. 농경지에서 바이오매스 연소에 의한 배출',
														
 
															-    'G. 기타',
														
 
															+    "총배출량",
														
 
															+    "순배출량",
														
 
															+    "에너지",
														
 
															+    "A. 연료연소",
														
 
															+    "1. 에너지산업",
														
 
															+    "a. 공공전기 및 열 생산",
														
 
															+    "b. 석유정제",
														
 
															+    "c. 고체연료 제조 및 기타 에너지 산업",
														
 
															+    "2. 제조업 및 건설업",
														
 
															+    "a. 철강",
														
 
															+    "b. 비철금속",
														
 
															+    "c. 화학",
														
 
															+    "d. 펄프, 제지 및 인쇄",
														
 
															+    "e. 식음료품 가공 및 담배 제조",
														
 
															+    "f. 기타",
														
 
															+    "  1. 비금속",
														
 
															+    "  2. 조립금속",
														
 
															+    "  3. 나무 및 목재",
														
 
															+    "  4. 건설",
														
 
															+    "  5. 섬유 및 가죽",
														
 
															+    "  6. 기타제조",
														
 
															+    "3. 수송",
														
 
															+    "a. 민간항공",
														
 
															+    "b. 도로수송",
														
 
															+    "c. 철도",
														
 
															+    "d. 해운",
														
 
															+    "e. 기타수송",
														
 
															+    "4. 기타",
														
 
															+    "a. 상업/공공",
														
 
															+    "b. 가정",
														
 
															+    "c. 농업/임업/어업",
														
 
															+    "5. 미분류",
														
 
															+    "B. 탈루",
														
 
															+    "1. 고체연료",
														
 
															+    "2.  석유 및 천연가스",
														
 
															+    "a.  석유",
														
 
															+    "b. 천연가스",
														
 
															+    "산업공정",
														
 
															+    "A. 광물산업",
														
 
															+    "1. 시멘트생산",
														
 
															+    "2. 석회생산",
														
 
															+    "3. 석회석 및 백운석 소비",
														
 
															+    "4. 소다회 생산 및 소비",
														
 
															+    "5. 아스팔트 루핑",
														
 
															+    "6. 아스팔트 도로포장",
														
 
															+    "B. 화학산업",
														
 
															+    "C. 금속산업",
														
 
															+    "1. 철강생산",
														
 
															+    "2. 합금철 생산",
														
 
															+    "3. 알루미늄 생산",
														
 
															+    "4. 마그네슘 생산의 SF6 소비",
														
 
															+    "D. 기타산업",
														
 
															+    "E. 할로카본 및 육불화황 생산",
														
 
															+    "1. 부산물 배출",
														
 
															+    "2. 탈루 배출",
														
 
															+    "F. 할로카본 및 육불화황 소비",
														
 
															+    "1.  냉장 및 냉방",
														
 
															+    "2.  발포제",
														
 
															+    "3.  소화기",
														
 
															+    "4.  에어로졸",
														
 
															+    "5.  용매",
														
 
															+    "6.  기타 용도의 ODS 대체물질 사용",
														
 
															+    "7.  반도체 제조",
														
 
															+    "8.  중전기기",
														
 
															+    "9.  기타(잠재배출량)",
														
 
															+    "농업",
														
 
															+    "A.  장내발효",
														
 
															+    "1. 소",
														
 
															+    "2. 물소",
														
 
															+    "3. 양(면양)",
														
 
															+    "4. 양(산양)",
														
 
															+    "5. 낙타 및 라마",
														
 
															+    "6. 말",
														
 
															+    "7. 노새 및 당나귀",
														
 
															+    "8. 돼지",
														
 
															+    "9. 가금류",
														
 
															+    "10. 기타 가축(사슴)",
														
 
															+    "B.  가축분뇨처리",
														
 
															+    "1. 소",
														
 
															+    "2. 물소",
														
 
															+    "3. 양(면양)",
														
 
															+    "4. 양(산양)",
														
 
															+    "5. 낙타 및 라마",
														
 
															+    "6. 말",
														
 
															+    "7. 노새 및 당나귀",
														
 
															+    "8. 돼지",
														
 
															+    "9. 가금류",
														
 
															+    "10. 기타 가축(사슴)",
														
 
															+    "C.  벼재배",
														
 
															+    "1. 관개",
														
 
															+    "2. 천수답",
														
 
															+    "D. 농경지토양",
														
 
															+    "1. 직접배출",
														
 
															+    "2. 목장, 방목구역, 분료(거름)",
														
 
															+    "3. 간접배출",
														
 
															+    "E. 사바나 소각",
														
 
															+    "F. 작물잔사소각",
														
 
															+    "1. 곡류",
														
 
															+    "2. 두류(콩)",
														
 
															+    "3. 근채류",
														
 
															+    "4. 사탕수수",
														
 
															+    "5. 기타",
														
 
															+    "LULUCF",
														
 
															+    "A. 산림지",
														
 
															+    "1. 산림지로 유지된 산림지",
														
 
															+    "2. 타토지에서 전용된 산림지",
														
 
															+    "3. 산림지에서 질소 시비로 인한 N2O 배출",
														
 
															+    "4. 산림지에서 배수로 인한 Non-CO2 배출",
														
 
															+    "5. 산림지에서 바이오매스 연소에 의한 배출",
														
 
															+    "B. 농경지",
														
 
															+    "1. 농경지로 유지된 농경지",
														
 
															+    "2. 타토지에서 전용된 농경지",
														
 
															+    "3. 농경지로의 전용에 따른 N2O 배출",
														
 
															+    "4. 농경지에서 농업용 석회시용으로 인한 CO2 배출",
														
 
															+    "5. 농경지에서 바이오매스 연소에 의한 배출",
														
 
															+    "C. 초지",
														
 
															+    "1. 초지로 유지된 초지",
														
 
															+    "2. 타토지에서 전용된 초지",
														
 
															+    "3. 초지에서 농업용 석회시용으로 인한 CO2 배출",
														
 
															+    "4. 초지에서 바이오매스 연소에 의한 배출",
														
 
															+    "D. 습지",
														
 
															+    "1. 습지로 유지된 습지",
														
 
															+    "2. 타토지에서 전용된 습지",
														
 
															+    "3. 습지에서 배수로 인한 Non-CO2 배출",
														
 
															+    "4. 습지에서 바이오매스 연소에 의한 배출",
														
 
															+    "E. 정주지",
														
 
															+    "F. 기타토지",
														
 
															+    "폐기물",
														
 
															+    "A. 폐기물매립",
														
 
															+    "1. 관리형 매립",
														
 
															+    "2. 비관리형 매립",
														
 
															+    "B. 하폐수처리",
														
 
															+    "1. 폐수처리",
														
 
															+    "2. 하수처리",
														
 
															+    "C. 폐기물소각",
														
 
															+    "D. 기타",
														
 
															+    "별도항목(Memo Item)",
														
 
															+    "분야·부문/연도",
														
 
															+    "C. 국제벙커링 및 다국적 작전",
														
 
															+    "1. 벙커링",
														
 
															+    "a. 국제 항공",
														
 
															+    "b. 국제 해운",
														
 
															+    "2. 다국적 작전",
														
 
															+    "* 참고 : NO = 배출활동 및 공정이 없는 경우, NE = 산정하지 아니하는 경우, NA = 자연적, "
														
 
															+    "이론적으로 발생하지 않는 활동 및 공정의 경우, IE = 다른 항목에 포함하여 보고하는 경우, "
														
 
															+    "C = 기밀정보인 경우",
														
 
															+    "3. 타토지로 전용된 농경지",  # start of new codes in 2021 inventory
														
 
															+    "4. 농경지로의 전용에 따른 N2O 배출",
														
 
															+    "5. 농경지에서 농업용 석회시용으로 인한 CO2 배출",
														
 
															+    "6. 농경지에서 바이오매스 연소에 의한 배출",
														
 
															+    "G. 기타",
														
 
															 ]
														
 
															 translations = [
														
 
															-    ['Total emissions', 'M.0.EL'],
														
 
															-    ['Net emissions', '0'],
														
 
															-    ['energy', '1'],
														
 
															-    ['A. Fuel combustion', '1.A'],
														
 
															-    ['1. Energy industry', '1.A.1'],
														
 
															-    ['a. Public electricity and heat production', '1.A.1.a'],
														
 
															-    ['b. Oil refining', '1.A.1.b'],
														
 
															-    ['c. Solid fuel manufacturing and other energy industries', '1.A.1.c'],
														
 
															-    ['2. Manufacturing and construction', '1.A.2'],
														
 
															-    ['a. steel', '1.A.2.a'],
														
 
															-    ['b. Non-ferrous metal', '1.A.2.b'],
														
 
															-    ['c. chemistry', '1.A.2.c'],
														
 
															-    ['d. Pulp, paper and printing', '1.A.2.d'],
														
 
															-    ['e. Food and beverage processing and tobacco manufacturing', '1.A.2.e'],
														
 
															-    ['f. Etc', '1.A.2.f'],
														
 
															-    ['  1. Non-metal', '1.A.2.f.1'],
														
 
															-    ['  2. Assembly metal', '1.A.2.f.2'],
														
 
															-    ['  3. Wood and timber', '1.A.2.f.3'],
														
 
															-    ['  4. Construction', '1.A.2.f.4'],
														
 
															-    ['  5. Textile and leather', '1.A.2.f.5'],
														
 
															-    ['  6. Other manufacturing', '1.A.2.f.6'],
														
 
															-    ['3. Transportation', '1.A.3'],
														
 
															-    ['a. Civil aviation', '1.A.3.a.2'],
														
 
															-    ['b. Road transport', '1.A.3.b'],
														
 
															-    ['c. railroad', '1.A.3.c'],
														
 
															-    ['d. shipping', '1.A.3.d.2'],
														
 
															-    ['e. Other transport', '1.A.3.e'],
														
 
															-    ['4. Other', '1.A.4'],
														
 
															-    ['a. Commercial/Public', '1.A.4.a'],
														
 
															-    ['b. home', '1.A.4.b'],
														
 
															-    ['c. Agriculture/Forestry/Fishing', '1.A.4.c'],
														
 
															-    ['5. Uncategorized', '1.A.5'],
														
 
															-    ['B. Talu', '1.B'],
														
 
															-    ['1. Solid fuel', '1.B.1'],
														
 
															-    ['2. Oil and natural gas', '1.B.2'],
														
 
															-    ['a. oil', '1.B.2.a'],
														
 
															-    ['b. Natural gas', '1.B.2.b'],
														
 
															-    ['Industrial process', '2'],
														
 
															-    ['A. Mineral industry', '2.A'],
														
 
															-    ['1. Cement production', '2.A.1'],
														
 
															-    ['2. Lime production', '2.A.2'],
														
 
															-    ['3. Limestone and Dolomite Consumption', '2.A.3'],
														
 
															-    ['4. Soda ash production and consumption', '2.A.4'],
														
 
															-    ['5. Asphalt roofing', '2.A.5'],
														
 
															-    ['6. Asphalt road pavement', '2.A.6'],
														
 
															-    ['B. Chemical industry', '2.B'],
														
 
															-    ['C. Metal Industry', '2.C'],
														
 
															-    ['1. Steel production', '2.C.1'],
														
 
															-    ['2. Ferroalloy production', '2.C.2'],
														
 
															-    ['3. Aluminum production', '2.C.3'],
														
 
															-    ['4. SF6 consumption in magnesium production', '2.C.4'],
														
 
															-    ['D. Other industries', '2.D'],
														
 
															-    ['E. Production of halocarbons and sulfur hexafluoride', '2.E'],
														
 
															-    ['1. Emission of by-products', '2.E.1'],
														
 
															-    ['2. Fugitive discharge', '2.E.2'],
														
 
															-    ['F. Consumption of halocarbons and sulfur hexafluoride', '2.F'],
														
 
															-    ['1. Refrigeration and cooling', '2.F.1'],
														
 
															-    ['2. Foaming agent', '2.F.2'],
														
 
															-    ['3. Fire extinguisher', '2.F.3'],
														
 
															-    ['4. Aerosol', '2.F.4'],
														
 
															-    ['5. Solvent', '2.F.5'],
														
 
															-    ['6. Use of ODS substitutes for other purposes', '2.F.6'],
														
 
															-    ['7. Semiconductor manufacturing', '2.F.7'],
														
 
															-    ['8. Heavy electric machine', '2.F.8'],
														
 
															-    ['9. Others (potential emissions)', '2.F.9'],
														
 
															-    ['Agriculture', '4'],
														
 
															-    ['A. Intestinal fermentation', '4.A'],
														
 
															-    ['1. cow', '4.A.1'],
														
 
															-    ['2. Water buffalo', '4.A.2'],
														
 
															-    ['3. Sheep (Cotton Sheep)', '4.A.3'],
														
 
															-    ['4. Sheep (Goat)', '4.A.4'],
														
 
															-    ['5. Camel and Llama', '4.A.5'],
														
 
															-    ['6. Horse', '4.A.6'],
														
 
															-    ['7. Mules and Donkeys', '4.A.7'],
														
 
															-    ['8. Pig', '4.A.8'],
														
 
															-    ['9. Poultry', '4.A.9'],
														
 
															-    ['10. Other livestock (deer)', '4.A.10'],
														
 
															-    ['B. Livestock manure treatment', '4.B'],
														
 
															-    ['1. cow', '4.B.1'],
														
 
															-    ['2. Water buffalo', '4.B.2'],
														
 
															-    ['3. Sheep (Cotton Sheep)', '4.B.3'],
														
 
															-    ['4. Sheep (Goat)', '4.B.4'],
														
 
															-    ['5. Camel and Llama', '4.B.5'],
														
 
															-    ['6. Horse', '4.B.6'],
														
 
															-    ['7. Mules and Donkeys', '4.B.7'],
														
 
															-    ['8. Pig', '4.B.8'],
														
 
															-    ['9. Poultry', '4.B.9'],
														
 
															-    ['10. Other livestock (deer)', '4.B.10'],
														
 
															-    ['C. Rice cultivation', '4.C'],
														
 
															-    ['1. irrigation', '4.C.1'],
														
 
															-    ['2. Thousand answers', '4.C.4'],
														
 
															-    ['D. Cropland soil', '4.D'],
														
 
															-    ['1. Direct discharge', '4.D.1'],
														
 
															-    ['2. Ranch, grazing area, manure (manure)', '4.D.2'],
														
 
															-    ['3. Indirect emissions', '4.D.3'],
														
 
															-    ['E. Savannah incineration', '4.E'],
														
 
															-    ['F. Crop residue incineration', '4.F'],
														
 
															-    ['1. Grains', '4.F.1'],
														
 
															-    ['2. Beans (beans)', '4.F.2'],
														
 
															-    ['3. Root vegetables', '4.F.3'],
														
 
															-    ['4. Sugar cane', '4.F.4'],
														
 
															-    ['5. Other', '4.F.5'],
														
 
															-    ['LULUCF', '5'],
														
 
															-    ['A. Forest land', '5.A'],
														
 
															-    ['1. Forest land maintained as a forest land', '5.A.1'],  # categories differ from IPCC1996
														
 
															-    ['2. Forest land converted from other lands', '5.A.2'],  # categories differ from IPCC1996
														
 
															-    ['3. N2O emissions from nitrogen fertilization in forest areas', '5.A.3'],  # categories differ from IPCC1996
														
 
															-    ['4. Non-CO2 emission due to drainage in forest areas', '5.A.4'],  # categories differ from IPCC1996
														
 
															-    ['5. Emissions from biomass combustion in forest areas', '5.A.5'],  # categories differ from IPCC1996
														
 
															-    ['B. Cropland', '5.B'],
														
 
															-    ['1. Agricultural land maintained as agricultural land', '5.B.1'],  # categories differ from IPCC1996
														
 
															-    ['2. Cropland converted from other lands', '5.B.2'],  # categories differ from IPCC1996
														
 
															-    ['3. N2O emission due to conversion to agricultural land', '5.B.3'],  # categories differ from IPCC1996
														
 
															-    ['4. CO2 emission from agricultural lime application in agricultural land', '5.B.4'],  # categories differ from IPCC1996
														
 
															-    ['5. Emissions from biomass combustion in agricultural land', '5.B.5'],  # categories differ from IPCC1996
														
 
															-    ['C. Grassland', '5.C'],
														
 
															-    ['1. Grassland maintained as grassland', '5.C.1'],  # categories differ from IPCC1996
														
 
															-    ['2. Grassland dedicated to Tatoji', '5.C.2'],  # categories differ from IPCC1996
														
 
															-    ['3. CO2 emission from agricultural lime application in grassland', '5.C.3'],  # categories differ from IPCC1996
														
 
															-    ['4. Emissions from biomass combustion in grassland', '5.C.4'],  # categories differ from IPCC1996
														
 
															-    ['D. Wetlands', '5.D'],
														
 
															-    ['1. Wetlands maintained as wetlands', '5.D.1'],  # categories differ from IPCC1996
														
 
															-    ['2. Wetlands converted from Tatoji', '5.D.2'],  # categories differ from IPCC1996
														
 
															-    ['3. Non-CO2 emission due to drainage in wetlands', '5.D.3'],  # categories differ from IPCC1996
														
 
															-    ['4. Emissions from biomass combustion in wetlands', '5.D.4'],  # categories differ from IPCC1996
														
 
															-    ['E. Jeongju-ji', '5.E'],
														
 
															-    ['F. Other land', '5.F'],
														
 
															-    ['waste', '6'],
														
 
															-    ['A. Landfill of waste', '6.A'],
														
 
															-    ['1. Managed landfill', '6.A.1'],
														
 
															-    ['2. Unmanaged landfill', '6.A.2'],
														
 
															-    ['B. Sewage water treatment', '6.B'],
														
 
															-    ['1. Wastewater treatment', '6.B.1'],  # categories differ from IPCC1996
														
 
															-    ['2. Sewage treatment', '6.B.2'],  # categories differ from IPCC1996
														
 
															-    ['C. Waste incineration', '6.C'],
														
 
															-    ['D. Other', '6.D'],
														
 
															-    ['Memo Item', '\\IGNORE'],
														
 
															-    ['Field·Sector/Year', '\\IGNORE'],
														
 
															-    ['C. International bunkering and multinational operations', '\\IGNORE'],
														
 
															-    ['1. Bunkering', 'M.1'],
														
 
															-    ['a. International aviation', 'M.1.A'],
														
 
															-    ['b. International shipping', 'M.1.B'],
														
 
															-    ['2. Multinational operations', 'M.2'],
														
 
															-    ['', '\\IGNORE'],
														
 
															-    ['3. Farmland converted to Tato land', '5.B.3'],  # new codes in 2021 inventory start here
														
 
															-    ['4. N2O emission due to conversion to agricultural land', '5.B.4'],
														
 
															-    ['5. CO2 emission from agricultural lime application in agricultural land', '5.B.5'],
														
 
															-    ['6. Emissions from burning biomass on agricultural land', '5.B.6'],
														
 
															-    ['G. Others', '5.G'],
														
 
															+    ["Total emissions", "M.0.EL"],
														
 
															+    ["Net emissions", "0"],
														
 
															+    ["energy", "1"],
														
 
															+    ["A. Fuel combustion", "1.A"],
														
 
															+    ["1. Energy industry", "1.A.1"],
														
 
															+    ["a. Public electricity and heat production", "1.A.1.a"],
														
 
															+    ["b. Oil refining", "1.A.1.b"],
														
 
															+    ["c. Solid fuel manufacturing and other energy industries", "1.A.1.c"],
														
 
															+    ["2. Manufacturing and construction", "1.A.2"],
														
 
															+    ["a. steel", "1.A.2.a"],
														
 
															+    ["b. Non-ferrous metal", "1.A.2.b"],
														
 
															+    ["c. chemistry", "1.A.2.c"],
														
 
															+    ["d. Pulp, paper and printing", "1.A.2.d"],
														
 
															+    ["e. Food and beverage processing and tobacco manufacturing", "1.A.2.e"],
														
 
															+    ["f. Etc", "1.A.2.f"],
														
 
															+    ["  1. Non-metal", "1.A.2.f.1"],
														
 
															+    ["  2. Assembly metal", "1.A.2.f.2"],
														
 
															+    ["  3. Wood and timber", "1.A.2.f.3"],
														
 
															+    ["  4. Construction", "1.A.2.f.4"],
														
 
															+    ["  5. Textile and leather", "1.A.2.f.5"],
														
 
															+    ["  6. Other manufacturing", "1.A.2.f.6"],
														
 
															+    ["3. Transportation", "1.A.3"],
														
 
															+    ["a. Civil aviation", "1.A.3.a.2"],
														
 
															+    ["b. Road transport", "1.A.3.b"],
														
 
															+    ["c. railroad", "1.A.3.c"],
														
 
															+    ["d. shipping", "1.A.3.d.2"],
														
 
															+    ["e. Other transport", "1.A.3.e"],
														
 
															+    ["4. Other", "1.A.4"],
														
 
															+    ["a. Commercial/Public", "1.A.4.a"],
														
 
															+    ["b. home", "1.A.4.b"],
														
 
															+    ["c. Agriculture/Forestry/Fishing", "1.A.4.c"],
														
 
															+    ["5. Uncategorized", "1.A.5"],
														
 
															+    ["B. Talu", "1.B"],
														
 
															+    ["1. Solid fuel", "1.B.1"],
														
 
															+    ["2. Oil and natural gas", "1.B.2"],
														
 
															+    ["a. oil", "1.B.2.a"],
														
 
															+    ["b. Natural gas", "1.B.2.b"],
														
 
															+    ["Industrial process", "2"],
														
 
															+    ["A. Mineral industry", "2.A"],
														
 
															+    ["1. Cement production", "2.A.1"],
														
 
															+    ["2. Lime production", "2.A.2"],
														
 
															+    ["3. Limestone and Dolomite Consumption", "2.A.3"],
														
 
															+    ["4. Soda ash production and consumption", "2.A.4"],
														
 
															+    ["5. Asphalt roofing", "2.A.5"],
														
 
															+    ["6. Asphalt road pavement", "2.A.6"],
														
 
															+    ["B. Chemical industry", "2.B"],
														
 
															+    ["C. Metal Industry", "2.C"],
														
 
															+    ["1. Steel production", "2.C.1"],
														
 
															+    ["2. Ferroalloy production", "2.C.2"],
														
 
															+    ["3. Aluminum production", "2.C.3"],
														
 
															+    ["4. SF6 consumption in magnesium production", "2.C.4"],
														
 
															+    ["D. Other industries", "2.D"],
														
 
															+    ["E. Production of halocarbons and sulfur hexafluoride", "2.E"],
														
 
															+    ["1. Emission of by-products", "2.E.1"],
														
 
															+    ["2. Fugitive discharge", "2.E.2"],
														
 
															+    ["F. Consumption of halocarbons and sulfur hexafluoride", "2.F"],
														
 
															+    ["1. Refrigeration and cooling", "2.F.1"],
														
 
															+    ["2. Foaming agent", "2.F.2"],
														
 
															+    ["3. Fire extinguisher", "2.F.3"],
														
 
															+    ["4. Aerosol", "2.F.4"],
														
 
															+    ["5. Solvent", "2.F.5"],
														
 
															+    ["6. Use of ODS substitutes for other purposes", "2.F.6"],
														
 
															+    ["7. Semiconductor manufacturing", "2.F.7"],
														
 
															+    ["8. Heavy electric machine", "2.F.8"],
														
 
															+    ["9. Others (potential emissions)", "2.F.9"],
														
 
															+    ["Agriculture", "4"],
														
 
															+    ["A. Intestinal fermentation", "4.A"],
														
 
															+    ["1. cow", "4.A.1"],
														
 
															+    ["2. Water buffalo", "4.A.2"],
														
 
															+    ["3. Sheep (Cotton Sheep)", "4.A.3"],
														
 
															+    ["4. Sheep (Goat)", "4.A.4"],
														
 
															+    ["5. Camel and Llama", "4.A.5"],
														
 
															+    ["6. Horse", "4.A.6"],
														
 
															+    ["7. Mules and Donkeys", "4.A.7"],
														
 
															+    ["8. Pig", "4.A.8"],
														
 
															+    ["9. Poultry", "4.A.9"],
														
 
															+    ["10. Other livestock (deer)", "4.A.10"],
														
 
															+    ["B. Livestock manure treatment", "4.B"],
														
 
															+    ["1. cow", "4.B.1"],
														
 
															+    ["2. Water buffalo", "4.B.2"],
														
 
															+    ["3. Sheep (Cotton Sheep)", "4.B.3"],
														
 
															+    ["4. Sheep (Goat)", "4.B.4"],
														
 
															+    ["5. Camel and Llama", "4.B.5"],
														
 
															+    ["6. Horse", "4.B.6"],
														
 
															+    ["7. Mules and Donkeys", "4.B.7"],
														
 
															+    ["8. Pig", "4.B.8"],
														
 
															+    ["9. Poultry", "4.B.9"],
														
 
															+    ["10. Other livestock (deer)", "4.B.10"],
														
 
															+    ["C. Rice cultivation", "4.C"],
														
 
															+    ["1. irrigation", "4.C.1"],
														
 
															+    ["2. Thousand answers", "4.C.4"],
														
 
															+    ["D. Cropland soil", "4.D"],
														
 
															+    ["1. Direct discharge", "4.D.1"],
														
 
															+    ["2. Ranch, grazing area, manure (manure)", "4.D.2"],
														
 
															+    ["3. Indirect emissions", "4.D.3"],
														
 
															+    ["E. Savannah incineration", "4.E"],
														
 
															+    ["F. Crop residue incineration", "4.F"],
														
 
															+    ["1. Grains", "4.F.1"],
														
 
															+    ["2. Beans (beans)", "4.F.2"],
														
 
															+    ["3. Root vegetables", "4.F.3"],
														
 
															+    ["4. Sugar cane", "4.F.4"],
														
 
															+    ["5. Other", "4.F.5"],
														
 
															+    ["LULUCF", "5"],
														
 
															+    ["A. Forest land", "5.A"],
														
 
															+    [
														
 
															+        "1. Forest land maintained as a forest land",
														
 
															+        "5.A.1",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "2. Forest land converted from other lands",
														
 
															+        "5.A.2",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "3. N2O emissions from nitrogen fertilization in forest areas",
														
 
															+        "5.A.3",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "4. Non-CO2 emission due to drainage in forest areas",
														
 
															+        "5.A.4",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "5. Emissions from biomass combustion in forest areas",
														
 
															+        "5.A.5",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    ["B. Cropland", "5.B"],
														
 
															+    [
														
 
															+        "1. Agricultural land maintained as agricultural land",
														
 
															+        "5.B.1",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "2. Cropland converted from other lands",
														
 
															+        "5.B.2",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "3. N2O emission due to conversion to agricultural land",
														
 
															+        "5.B.3",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "4. CO2 emission from agricultural lime application in agricultural land",
														
 
															+        "5.B.4",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "5. Emissions from biomass combustion in agricultural land",
														
 
															+        "5.B.5",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    ["C. Grassland", "5.C"],
														
 
															+    [
														
 
															+        "1. Grassland maintained as grassland",
														
 
															+        "5.C.1",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    ["2. Grassland dedicated to Tatoji", "5.C.2"],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "3. CO2 emission from agricultural lime application in grassland",
														
 
															+        "5.C.3",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "4. Emissions from biomass combustion in grassland",
														
 
															+        "5.C.4",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    ["D. Wetlands", "5.D"],
														
 
															+    ["1. Wetlands maintained as wetlands", "5.D.1"],  # categories differ from IPCC1996
														
 
															+    ["2. Wetlands converted from Tatoji", "5.D.2"],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "3. Non-CO2 emission due to drainage in wetlands",
														
 
															+        "5.D.3",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    [
														
 
															+        "4. Emissions from biomass combustion in wetlands",
														
 
															+        "5.D.4",
														
 
															+    ],  # categories differ from IPCC1996
														
 
															+    ["E. Jeongju-ji", "5.E"],
														
 
															+    ["F. Other land", "5.F"],
														
 
															+    ["waste", "6"],
														
 
															+    ["A. Landfill of waste", "6.A"],
														
 
															+    ["1. Managed landfill", "6.A.1"],
														
 
															+    ["2. Unmanaged landfill", "6.A.2"],
														
 
															+    ["B. Sewage water treatment", "6.B"],
														
 
															+    ["1. Wastewater treatment", "6.B.1"],  # categories differ from IPCC1996
														
 
															+    ["2. Sewage treatment", "6.B.2"],  # categories differ from IPCC1996
														
 
															+    ["C. Waste incineration", "6.C"],
														
 
															+    ["D. Other", "6.D"],
														
 
															+    ["Memo Item", "\\IGNORE"],
														
 
															+    ["Field·Sector/Year", "\\IGNORE"],
														
 
															+    ["C. International bunkering and multinational operations", "\\IGNORE"],
														
 
															+    ["1. Bunkering", "M.1"],
														
 
															+    ["a. International aviation", "M.1.A"],
														
 
															+    ["b. International shipping", "M.1.B"],
														
 
															+    ["2. Multinational operations", "M.2"],
														
 
															+    ["", "\\IGNORE"],
														
 
															+    [
														
 
															+        "3. Farmland converted to Tato land",
														
 
															+        "5.B.3",
														
 
															+    ],  # new codes in 2021 inventory start here
														
 
															+    ["4. N2O emission due to conversion to agricultural land", "5.B.4"],
														
 
															+    [
														
 
															+        "5. CO2 emission from agricultural lime application in agricultural land",
														
 
															+        "5.B.5",
														
 
															+    ],
														
 
															+    ["6. Emissions from burning biomass on agricultural land", "5.B.6"],
														
 
															+    ["G. Others", "5.G"],
														
 
															 ]
														
 
															 cat_name_translations = dict(zip(original_names, [cat[0] for cat in translations]))
														
 
															 cat_codes = dict(zip(original_names, [cat[1] for cat in translations]))
														
 
															 remove_cats = [
														
 
															-    '1.A.1.a', '1.A.1.b', '1.A.1.c', '1.A.2.f',
														
 
															-    '2.A', '2.D',
														
 
															-    '2.F', '2.G',
														
 
															-    '4.C.1', '4.C.4',
														
 
															-    '4.D',
														
 
															-    '4.F.1', '4.F.2', '4.F.3', '4.F.4', '4.F.5',  # detail not in 2006 categories
														
 
															-    '5.A', '5.A.1', '5.A.2', '5.A.3', '5.A.4', '5.A.5',  # don't not match IPCC
														
 
															+    "1.A.1.a",
														
 
															+    "1.A.1.b",
														
 
															+    "1.A.1.c",
														
 
															+    "1.A.2.f",
														
 
															+    "2.A",
														
 
															+    "2.D",
														
 
															+    "2.F",
														
 
															+    "2.G",
														
 
															+    "4.C.1",
														
 
															+    "4.C.4",
														
 
															+    "4.D",
														
 
															+    "4.F.1",
														
 
															+    "4.F.2",
														
 
															+    "4.F.3",
														
 
															+    "4.F.4",
														
 
															+    "4.F.5",  # detail not in 2006 categories
														
 
															+    "5.A",
														
 
															+    "5.A.1",
														
 
															+    "5.A.2",
														
 
															+    "5.A.3",
														
 
															+    "5.A.4",
														
 
															+    "5.A.5",  # don't not match IPCC
														
 
															     # categories
														
 
															-    '5.B', '5.B.1', '5.B.2', '5.B.3', '5.B.4', '5.B.5',
														
 
															-    '5.C', '5.C.1', '5.C.2', '5.C.3', '5.C.4',
														
 
															-    '5.D', '5.D.1', '5.D.2', '5.D.3', '5.D.4',
														
 
															-    '5.E', '5.F',
														
 
															-    '5.G', '5.B.6', # for 2021 NIR
														
 
															+    "5.B",
														
 
															+    "5.B.1",
														
 
															+    "5.B.2",
														
 
															+    "5.B.3",
														
 
															+    "5.B.4",
														
 
															+    "5.B.5",
														
 
															+    "5.C",
														
 
															+    "5.C.1",
														
 
															+    "5.C.2",
														
 
															+    "5.C.3",
														
 
															+    "5.C.4",
														
 
															+    "5.D",
														
 
															+    "5.D.1",
														
 
															+    "5.D.2",
														
 
															+    "5.D.3",
														
 
															+    "5.D.4",
														
 
															+    "5.E",
														
 
															+    "5.F",
														
 
															+    "5.G",
														
 
															+    "5.B.6",  # for 2021 NIR
														
 
															 ]
														
 
															 aggregate_before_mapping = {
														
 
															-    '2006.2.D.4': {'sources': ['2.A.5', '2.A.6'], 'name': 'Other'},
														
 
															-    '2006.3.C.4': {'sources': ['4.D.1', '4.D.2'],
														
 
															-                   'name': 'Direct N2O Emissions from Managed Soils'},
														
 
															-    '2006.M.3C1AG': {'sources': ['4.E', '4.F'], 'name': 'Biomass burning Agriculture'},
														
 
															-    '2006.1.A.2.m': {'sources': ['1.A.2.f.2', '1.A.2.f.6'], 'name': 'Other'},
														
 
															+    "2006.2.D.4": {"sources": ["2.A.5", "2.A.6"], "name": "Other"},
														
 
															+    "2006.3.C.4": {
														
 
															+        "sources": ["4.D.1", "4.D.2"],
														
 
															+        "name": "Direct N2O Emissions from Managed Soils",
														
 
															+    },
														
 
															+    "2006.M.3C1AG": {"sources": ["4.E", "4.F"], "name": "Biomass burning Agriculture"},
														
 
															+    "2006.1.A.2.m": {"sources": ["1.A.2.f.2", "1.A.2.f.6"], "name": "Other"},
														
 
															 }
														
 
															 cat_mapping = {
														
 
															-    '1.A.2.f.1': '1.A.2.f',
														
 
															-    '1.A.2.f.3': '1.A.2.j',
														
 
															-    '1.A.2.f.4': '1.A.2.k',
														
 
															-    '1.A.2.f.5': '1.A.2.l',
														
 
															-    '2006.1.A.2.m': '1.A.2.m',
														
 
															-    '2.A.4': '2.B.7',  # add to 2.B
														
 
															-    '2.A.3': '2.A.4',
														
 
															-    '2.D': '2.H',
														
 
															-    '2006.2.D.4': '2.D.4',
														
 
															-    '2.E': '2.B.9',  # add to 2.B
														
 
															-    '2.E.1': '2.B.9.a',
														
 
															-    '2.E.2': '2.B.9.b',
														
 
															+    "1.A.2.f.1": "1.A.2.f",
														
 
															+    "1.A.2.f.3": "1.A.2.j",
														
 
															+    "1.A.2.f.4": "1.A.2.k",
														
 
															+    "1.A.2.f.5": "1.A.2.l",
														
 
															+    "2006.1.A.2.m": "1.A.2.m",
														
 
															+    "2.A.4": "2.B.7",  # add to 2.B
														
 
															+    "2.A.3": "2.A.4",
														
 
															+    "2.D": "2.H",
														
 
															+    "2006.2.D.4": "2.D.4",
														
 
															+    "2.E": "2.B.9",  # add to 2.B
														
 
															+    "2.E.1": "2.B.9.a",
														
 
															+    "2.E.2": "2.B.9.b",
														
 
															     #    '2.F', # remove?
														
 
															-    '2.F.1': '2.F.1',  # just added here to avoid confusion
														
 
															+    "2.F.1": "2.F.1",  # just added here to avoid confusion
														
 
															     #    '2.F.2', '2.F.3', '2.F.4', '2.F.5',
														
 
															-    '2.F.6': '2.E_1',
														
 
															-    '2.F.7': '2.E_2',
														
 
															-    '2.F.8': '2.G.1',
														
 
															-    '2.F.9': '2.G.2',
														
 
															-    '4': 'M.AG',
														
 
															-    '4.A': '3.A.1',
														
 
															-    '4.A.1': '3.A.1.a',
														
 
															-    '4.A.2': '3.A.1.b',
														
 
															-    '4.A.3': '3.A.1.c',
														
 
															-    '4.A.4': '3.A.1.d',
														
 
															-    '4.A.5': '3.A.1.e',
														
 
															-    '4.A.6': '3.A.1.f',
														
 
															-    '4.A.7': '3.A.1.g',
														
 
															-    '4.A.8': '3.A.1.h',
														
 
															-    '4.A.9': '3.A.1.i',
														
 
															-    '4.A.10': '3.A.1.j',
														
 
															-    '4.B': '3.A.2',
														
 
															-    '4.B.1': '3.A.2.a',
														
 
															-    '4.B.2': '3.A.2.b',
														
 
															-    '4.B.3': '3.A.2.c',
														
 
															-    '4.B.4': '3.A.2.d',
														
 
															-    '4.B.5': '3.A.2.e',
														
 
															-    '4.B.6': '3.A.2.f',
														
 
															-    '4.B.7': '3.A.2.g',
														
 
															-    '4.B.8': '3.A.2.h',
														
 
															-    '4.B.9': '3.A.2.i',
														
 
															-    '4.B.10': '3.A.2.j',
														
 
															-    '4.C': '3.C.7',
														
 
															-    '2006.3.C.4': '3.C.4',
														
 
															-    '4.D.3': '3.C.5',
														
 
															-    '2006.M.3C1AG': 'M.3.C.1.AG',
														
 
															-    '5': 'M.LULUCF',
														
 
															-    '6': '4',
														
 
															-    '6.A': '4.A',
														
 
															-    '6.A.1': '4.A.1',
														
 
															-    '6.A.2': '4.A.2',
														
 
															-    '6.B': '4.D',
														
 
															-    '6.B.1': '4.D.1',
														
 
															-    '6.B.2': '4.D.2',
														
 
															-    '6.C': '4.C.1',
														
 
															-    '6.D': '4.E',
														
 
															-    'M.1': 'M.BK',
														
 
															-    'M.1.A': 'M.BK.A',
														
 
															-    'M.1.B': 'M.BK.M',
														
 
															+    "2.F.6": "2.E_1",
														
 
															+    "2.F.7": "2.E_2",
														
 
															+    "2.F.8": "2.G.1",
														
 
															+    "2.F.9": "2.G.2",
														
 
															+    "4": "M.AG",
														
 
															+    "4.A": "3.A.1",
														
 
															+    "4.A.1": "3.A.1.a",
														
 
															+    "4.A.2": "3.A.1.b",
														
 
															+    "4.A.3": "3.A.1.c",
														
 
															+    "4.A.4": "3.A.1.d",
														
 
															+    "4.A.5": "3.A.1.e",
														
 
															+    "4.A.6": "3.A.1.f",
														
 
															+    "4.A.7": "3.A.1.g",
														
 
															+    "4.A.8": "3.A.1.h",
														
 
															+    "4.A.9": "3.A.1.i",
														
 
															+    "4.A.10": "3.A.1.j",
														
 
															+    "4.B": "3.A.2",
														
 
															+    "4.B.1": "3.A.2.a",
														
 
															+    "4.B.2": "3.A.2.b",
														
 
															+    "4.B.3": "3.A.2.c",
														
 
															+    "4.B.4": "3.A.2.d",
														
 
															+    "4.B.5": "3.A.2.e",
														
 
															+    "4.B.6": "3.A.2.f",
														
 
															+    "4.B.7": "3.A.2.g",
														
 
															+    "4.B.8": "3.A.2.h",
														
 
															+    "4.B.9": "3.A.2.i",
														
 
															+    "4.B.10": "3.A.2.j",
														
 
															+    "4.C": "3.C.7",
														
 
															+    "2006.3.C.4": "3.C.4",
														
 
															+    "4.D.3": "3.C.5",
														
 
															+    "2006.M.3C1AG": "M.3.C.1.AG",
														
 
															+    "5": "M.LULUCF",
														
 
															+    "6": "4",
														
 
															+    "6.A": "4.A",
														
 
															+    "6.A.1": "4.A.1",
														
 
															+    "6.A.2": "4.A.2",
														
 
															+    "6.B": "4.D",
														
 
															+    "6.B.1": "4.D.1",
														
 
															+    "6.B.2": "4.D.2",
														
 
															+    "6.C": "4.C.1",
														
 
															+    "6.D": "4.E",
														
 
															+    "M.1": "M.BK",
														
 
															+    "M.1.A": "M.BK.A",
														
 
															+    "M.1.B": "M.BK.M",
														
 
															 }
														
 
															 aggregate_after_mapping = {
														
 
															-    '1.A.3.a': {'sources': ['1.A.3.a.2'], 'name': 'Civil Aviation'},  # aviation
														
 
															-    '1.A.3.d': {'sources': ['1.A.3.d.2'], 'name': 'Water-borne Navigation'},  # shipping
														
 
															-    '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4', '2.A.5', '2.A.6'],
														
 
															-            'name': 'Mineral Industry'},
														
 
															-    '2.B': {'sources': ['2.B', '2.B.7', '2.B.9'], 'name': 'Chemical Industry'},
														
 
															-    '2.D': {'sources': ['2.D.4'], 'name': 'Other'},
														
 
															-    '2.E': {'sources': ['2.E_1', '2.E_2'], 'name': 'Electronics Industry'},
														
 
															-    '2.F': {'sources': ['2.F.1', '2.F.2', '2.F.3', '2.F.4', '2.F.5'],
														
 
															-            'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
														
 
															-    '2.G': {'sources': ['2.G.1', '2.G.2'], 'name': 'Other Product Manufacture and Use'},
														
 
															-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-    '3.C': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
														
 
															-                 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-    'M.3.C.AG': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
														
 
															-                 'name': 'Aggregate sources and non-CO2 emissions sources on land ('
														
 
															-                         'Agriculture)'},
														
 
															-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
														
 
															-    '4.C': {'sources': ['4.C.1'], 'name': 'Incineration and Open Burning of Waste'},
														
 
															+    "1.A.3.a": {"sources": ["1.A.3.a.2"], "name": "Civil Aviation"},  # aviation
														
 
															+    "1.A.3.d": {"sources": ["1.A.3.d.2"], "name": "Water-borne Navigation"},  # shipping
														
 
															+    "2.A": {
														
 
															+        "sources": ["2.A.1", "2.A.2", "2.A.4", "2.A.5", "2.A.6"],
														
 
															+        "name": "Mineral Industry",
														
 
															+    },
														
 
															+    "2.B": {"sources": ["2.B", "2.B.7", "2.B.9"], "name": "Chemical Industry"},
														
 
															+    "2.D": {"sources": ["2.D.4"], "name": "Other"},
														
 
															+    "2.E": {"sources": ["2.E_1", "2.E_2"], "name": "Electronics Industry"},
														
 
															+    "2.F": {
														
 
															+        "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5"],
														
 
															+        "name": "Product uses as Substitutes for Ozone Depleting Substances",
														
 
															+    },
														
 
															+    "2.G": {"sources": ["2.G.1", "2.G.2"], "name": "Other Product Manufacture and Use"},
														
 
															+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+    "3.C": {
														
 
															+        "sources": ["3.C.4", "3.C.5", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+    },
														
 
															+    "M.3.C.AG": {
														
 
															+        "sources": ["3.C.4", "3.C.5", "3.C.7"],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land ("
														
 
															+        "Agriculture)",
														
 
															+    },
														
 
															+    "M.AG.ELV": {"sources": ["M.3.C.AG"], "name": "Agriculture excluding livestock"},
														
 
															+    "4.C": {"sources": ["4.C.1"], "name": "Incineration and Open Burning of Waste"},
														
 
															 }
														
 
															 coords_terminologies_2006 = {
														
@@ -422,27 +522,35 @@ filter_remove_2006 = {
 
															     },
														
 
															     "livestock": {  # temp until double cat name problem is solved
														
 
															         "category (IPCC2006_PRIMAP)": [
														
 
															-            '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
														
 
															-            '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
														
 
															+            "4.B.1",
														
 
															+            "4.B.10",
														
 
															+            "4.B.2",
														
 
															+            "4.B.3",
														
 
															+            "4.B.4",
														
 
															+            "4.B.5",
														
 
															+            "4.B.6",
														
 
															+            "4.B.7",
														
 
															+            "4.B.8",
														
 
															+            "4.B.9",
														
 
															         ]
														
 
															     },
														
 
															-    "fmap": {
														
 
															-        "category (IPCC2006_PRIMAP)": remove_cats
														
 
															-    },
														
 
															+    "fmap": {"category (IPCC2006_PRIMAP)": remove_cats},
														
 
															     "f_bef_map": {
														
 
															         "category (IPCC2006_PRIMAP)": [
														
 
															-            '2.A.5', '2.A.6',  # combined to 2006.2.D.4
														
 
															-            '4.D.1', '4.D.2',  # combined to 2006.3.C.4
														
 
															-            '4.E', '4.F',  # 2006.M.3.C.1.AG
														
 
															-            '1.A.2.f.2', '1.A.2.f.6',  # 2006.1.A.2.m
														
 
															+            "2.A.5",
														
 
															+            "2.A.6",  # combined to 2006.2.D.4
														
 
															+            "4.D.1",
														
 
															+            "4.D.2",  # combined to 2006.3.C.4
														
 
															+            "4.E",
														
 
															+            "4.F",  # 2006.M.3.C.1.AG
														
 
															+            "1.A.2.f.2",
														
 
															+            "1.A.2.f.6",  # 2006.1.A.2.m
														
 
															         ]
														
 
															-    }
														
 
															+    },
														
 
															 }
														
 
															 filter_remove_after_agg = {
														
 
															     "tempCats": {
														
 
															-        "category (IPCC2006_PRIMAP)": [
														
 
															-            "2.E_1", "2.E_2"
														
 
															-        ],
														
 
															+        "category (IPCC2006_PRIMAP)": ["2.E_1", "2.E_2"],
														
 
															     },
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
@@ -1,12 +1,18 @@
 
															-# this script reads data from Korea's 2021 national inventory which is underlying BUR4
														
 
															-# Data is read from the xlsx file
														
 
															+"""
														
 
															+Read Korea's 2021 inventory from xlsx
														
 
															+
														
 
															+This script reads data from Korea's 2021 national inventory
														
 
															+Data are read from the xlsx file
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 import os
														
 
															 import sys
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_kor_bur4 import (
														
 
															+from config_kor_bur4 import (
														
 
															     aggregate_after_mapping,
														
 
															     aggregate_before_mapping,
														
 
															     cat_codes,
														
@@ -24,42 +30,43 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
														
 
															-                   '2021-Inventory'
														
 
															-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Republic_of_Korea'
														
 
															+    input_folder = (
														
 
															+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2021-Inventory"
														
 
															+    )
														
 
															+    output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'KOR_2021-Inventory_2021_'
														
 
															+    output_filename = "KOR_2021-Inventory_2021_"
														
 
															-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2019).xlsx'
														
 
															+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2019).xlsx"
														
 
															     years_to_read = range(1990, 2019 + 1)
														
 
															-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
														
 
															+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
														
 
															     cols_to_read = range(1, 2019 - 1990 + 3)
														
 
															     # columns for category code and original category name
														
 
															-    index_cols = ['분야·부문/연도']
														
 
															+    index_cols = ["분야·부문/연도"]
														
 
															     sheet_metadata = {
														
 
															-        'entity': {
														
 
															-            '온실가스': 'KYOTOGHG (SARGWP100)',
														
 
															-            'CO2': 'CO2',
														
 
															-            'CH4': 'CH4 (SARGWP100)',
														
 
															-            'N2O': 'N2O (SARGWP100)',
														
 
															-            'HFCs': 'HFCS (SARGWP100)',
														
 
															-            'PFCs': 'PFCS (SARGWP100)',
														
 
															-            'SF6': 'SF6 (SARGWP100)',
														
 
															+        "entity": {
														
 
															+            "온실가스": "KYOTOGHG (SARGWP100)",
														
 
															+            "CO2": "CO2",
														
 
															+            "CH4": "CH4 (SARGWP100)",
														
 
															+            "N2O": "N2O (SARGWP100)",
														
 
															+            "HFCs": "HFCS (SARGWP100)",
														
 
															+            "PFCs": "PFCS (SARGWP100)",
														
 
															+            "SF6": "SF6 (SARGWP100)",
														
 
															+        },
														
 
															+        "unit": {
														
 
															+            "온실가스": "Gg CO2 / yr",
														
 
															+            "CO2": "Gg CO2 / yr",
														
 
															+            "CH4": "Gg CO2 / yr",
														
 
															+            "N2O": "Gg CO2 / yr",
														
 
															+            "HFCs": "Gg CO2 / yr",
														
 
															+            "PFCs": "Gg CO2 / yr",
														
 
															+            "SF6": "Gg CO2 / yr",
														
 
															         },
														
 
															-        'unit': {
														
 
															-            '온실가스': 'Gg CO2 / yr',
														
 
															-            'CO2': 'Gg CO2 / yr',
														
 
															-            'CH4': 'Gg CO2 / yr',
														
 
															-            'N2O': 'Gg CO2 / yr',
														
 
															-            'HFCs': 'Gg CO2 / yr',
														
 
															-            'PFCs': 'Gg CO2 / yr',
														
 
															-            'SF6': 'Gg CO2 / yr',
														
 
															-        }
														
 
															     }
														
 
															     # definitions for conversion to interchange format
														
@@ -73,7 +80,7 @@ if __name__ == "__main__":
 
															     add_coords_cols = {
														
 
															         "orig_cat_name": ["orig_cat_name", "category"],
														
 
															-        "cat_name_translation": ["cat_name_translation", "category"]
														
 
															+        "cat_name_translation": ["cat_name_translation", "category"],
														
 
															     }
														
 
															     coords_terminologies = {
														
@@ -99,12 +106,20 @@ if __name__ == "__main__":
 
															         "f1": {
														
 
															             "category (IPCC1996_KOR_INV)": "\\IGNORE",
														
 
															         },
														
 
															-        "livestock": { # temp until double cat name problem is solved
														
 
															+        "livestock": {  # temp until double cat name problem is solved
														
 
															             "category (IPCC1996_KOR_INV)": [
														
 
															-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
														
 
															-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
														
 
															+                "4.B.1",
														
 
															+                "4.B.10",
														
 
															+                "4.B.2",
														
 
															+                "4.B.3",
														
 
															+                "4.B.4",
														
 
															+                "4.B.5",
														
 
															+                "4.B.6",
														
 
															+                "4.B.7",
														
 
															+                "4.B.8",
														
 
															+                "4.B.9",
														
 
															             ]
														
 
															-        }
														
 
															+        },
														
 
															     }
														
 
															     filter_keep = {}
														
@@ -115,7 +130,8 @@ if __name__ == "__main__":
 
															         "contact": "mail@johannes-guetschow.de",
														
 
															         "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2021",
														
 
															         "comment": "Read fom xlsx file by Johannes Gütschow",
														
 
															-        "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
														
 
															+        "institution": "Republic of Korea, Ministry of Environment, "
														
 
															+        "Greenhouse Gas Inventory and Research Center",
														
 
															     }
														
 
															     cols_for_space_stripping = []
														
@@ -135,11 +151,17 @@ if __name__ == "__main__":
 
															     for sheet in sheets_to_read:
														
 
															         # read current sheet (one sheet per gas)
														
 
															-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
														
 
															-                                   engine="openpyxl")
														
 
															+        df_current = pd.read_excel(
														
 
															+            input_folder / inventory_file,
														
 
															+            sheet_name=sheet,
														
 
															+            skiprows=3,
														
 
															+            nrows=146,
														
 
															+            usecols=cols_to_read,
														
 
															+            engine="openpyxl",
														
 
															+        )
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set index. necessary for the stack operation in the conversion to long format
														
 
															         # df_current = df_current.set_index(index_cols)
														
 
															         # add columns
														
@@ -153,7 +175,7 @@ if __name__ == "__main__":
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     # rename category col because filtering produces problems with korean col names
														
 
															-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
														
 
															+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
														
 
															     # create copies of category col for further processing
														
 
															     df_all["orig_cat_name"] = df_all["category"]
														
@@ -172,20 +194,22 @@ if __name__ == "__main__":
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-    copy_df=True, # we need the unchanged DF for the conversion step
														
 
															-        )
														
 
															+        copy_df=True,  # we need the unchanged DF for the conversion step
														
 
															+    )
														
 
															     filter_data(data_if, filter_remove=filter_remove)
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															-    data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
														
 
															+    data_pm2 = data_pm2.reset_coords(
														
 
															+        ["orig_cat_name", "cat_name_translation"], drop=True
														
 
															+    )
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
 
															     # ###
														
@@ -193,17 +217,20 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    #pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    #data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # conversion to ipcc 2006 categories
														
 
															     # ###
														
 
															-
														
 
															-
														
 
															     data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_all,
														
 
															         coords_cols=coords_cols,
														
@@ -216,21 +243,23 @@ if __name__ == "__main__":
 
															         copy_df=True,  # don't mess up the dataframe when testing
														
 
															     )
														
 
															-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
														
 
															+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
														
 
															     # agg before mapping
														
 
															     for cat_to_agg in aggregate_before_mapping:
														
 
															-        mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
														
 
															+        mask = data_if_2006[cat_label].isin(
														
 
															+            aggregate_before_mapping[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -238,20 +267,25 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum()
														
 
															-
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name",
														
 
															-                              aggregate_before_mapping[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
 
															             if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
														
 
															-                filter_this_cat = {
														
 
															-                    "f": {cat_label: cat_to_agg}
														
 
															-                }
														
 
															+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
														
 
															                 filter_data(data_if_2006, filter_remove=filter_this_cat)
														
 
															             data_if_2006 = pd.concat([data_if_2006, df_combine])
														
@@ -268,17 +302,19 @@ if __name__ == "__main__":
 
															     # agg after mapping
														
 
															     for cat_to_agg in aggregate_after_mapping:
														
 
															-        mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
														
 
															+        mask = data_if_2006[cat_label].isin(
														
 
															+            aggregate_after_mapping[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -286,36 +322,49 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum()
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name",
														
 
															-                              aggregate_after_mapping[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
 
															             if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
														
 
															-                filter_this_cat = {
														
 
															-                    "f": {cat_label: cat_to_agg}
														
 
															-                }
														
 
															+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
														
 
															                 filter_data(data_if_2006, filter_remove=filter_this_cat)
														
 
															             data_if_2006 = pd.concat([data_if_2006, df_combine])
														
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															-    data_pm2_2006 = data_pm2_2006.reset_coords(["orig_cat_name", "cat_name_translation"],
														
 
															-                                           drop=True)
														
 
															+    data_pm2_2006 = data_pm2_2006.reset_coords(
														
 
															+        ["orig_cat_name", "cat_name_translation"], drop=True
														
 
															+    )
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															     # save IPCC2006 data
														
 
															     filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies_2006["category"]),
														
 
															+        data_if_2006,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
@@ -1,12 +1,17 @@
 
															-# this script reads data from Korea's 2021 national inventory which is underlying BUR4
														
 
															-# Data is read from the xlsx file
														
 
															+"""
														
 
															+Read Korea's 2021 inventory from xlsx
														
 
															+
														
 
															+This script reads data from Korea's 2022 national inventory
														
 
															+Data are read from the xlsx file
														
 
															+
														
 
															+"""
														
 
															 import os
														
 
															 import sys
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_kor_bur4 import (
														
 
															+from config_kor_bur4 import (
														
 
															     aggregate_after_mapping,
														
 
															     aggregate_before_mapping,
														
 
															     cat_codes,
														
@@ -24,42 +29,43 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
														
 
															-                   '2022-Inventory'
														
 
															-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Republic_of_Korea'
														
 
															+    input_folder = (
														
 
															+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2022-Inventory"
														
 
															+    )
														
 
															+    output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'KOR_2022-Inventory_2022_'
														
 
															+    output_filename = "KOR_2022-Inventory_2022_"
														
 
															-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2020).xlsx'
														
 
															+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2020).xlsx"
														
 
															     years_to_read = range(1990, 2020 + 1)
														
 
															-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
														
 
															+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
														
 
															     cols_to_read = range(1, 2020 - 1990 + 3)
														
 
															     # columns for category code and original category name
														
 
															-    index_cols = ['분야·부문/연도']
														
 
															+    index_cols = ["분야·부문/연도"]
														
 
															     sheet_metadata = {
														
 
															-        'entity': {
														
 
															-            '온실가스': 'KYOTOGHG (SARGWP100)',
														
 
															-            'CO2': 'CO2',
														
 
															-            'CH4': 'CH4 (SARGWP100)',
														
 
															-            'N2O': 'N2O (SARGWP100)',
														
 
															-            'HFCs': 'HFCS (SARGWP100)',
														
 
															-            'PFCs': 'PFCS (SARGWP100)',
														
 
															-            'SF6': 'SF6 (SARGWP100)',
														
 
															+        "entity": {
														
 
															+            "온실가스": "KYOTOGHG (SARGWP100)",
														
 
															+            "CO2": "CO2",
														
 
															+            "CH4": "CH4 (SARGWP100)",
														
 
															+            "N2O": "N2O (SARGWP100)",
														
 
															+            "HFCs": "HFCS (SARGWP100)",
														
 
															+            "PFCs": "PFCS (SARGWP100)",
														
 
															+            "SF6": "SF6 (SARGWP100)",
														
 
															+        },
														
 
															+        "unit": {
														
 
															+            "온실가스": "Gg CO2 / yr",
														
 
															+            "CO2": "Gg CO2 / yr",
														
 
															+            "CH4": "Gg CO2 / yr",
														
 
															+            "N2O": "Gg CO2 / yr",
														
 
															+            "HFCs": "Gg CO2 / yr",
														
 
															+            "PFCs": "Gg CO2 / yr",
														
 
															+            "SF6": "Gg CO2 / yr",
														
 
															         },
														
 
															-        'unit': {
														
 
															-            '온실가스': 'Gg CO2 / yr',
														
 
															-            'CO2': 'Gg CO2 / yr',
														
 
															-            'CH4': 'Gg CO2 / yr',
														
 
															-            'N2O': 'Gg CO2 / yr',
														
 
															-            'HFCs': 'Gg CO2 / yr',
														
 
															-            'PFCs': 'Gg CO2 / yr',
														
 
															-            'SF6': 'Gg CO2 / yr',
														
 
															-        }
														
 
															     }
														
 
															     # definitions for conversion to interchange format
														
@@ -73,7 +79,7 @@ if __name__ == "__main__":
 
															     add_coords_cols = {
														
 
															         "orig_cat_name": ["orig_cat_name", "category"],
														
 
															-        "cat_name_translation": ["cat_name_translation", "category"]
														
 
															+        "cat_name_translation": ["cat_name_translation", "category"],
														
 
															     }
														
 
															     coords_terminologies = {
														
@@ -99,12 +105,20 @@ if __name__ == "__main__":
 
															         "f1": {
														
 
															             "category (IPCC1996_KOR_INV)": "\\IGNORE",
														
 
															         },
														
 
															-        "livestock": { # temp until double cat name problem is solved
														
 
															+        "livestock": {  # temp until double cat name problem is solved
														
 
															             "category (IPCC1996_KOR_INV)": [
														
 
															-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
														
 
															-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
														
 
															+                "4.B.1",
														
 
															+                "4.B.10",
														
 
															+                "4.B.2",
														
 
															+                "4.B.3",
														
 
															+                "4.B.4",
														
 
															+                "4.B.5",
														
 
															+                "4.B.6",
														
 
															+                "4.B.7",
														
 
															+                "4.B.8",
														
 
															+                "4.B.9",
														
 
															             ]
														
 
															-        }
														
 
															+        },
														
 
															     }
														
 
															     filter_keep = {}
														
@@ -115,11 +129,10 @@ if __name__ == "__main__":
 
															         "contact": "mail@johannes-guetschow.de",
														
 
															         "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2022",
														
 
															         "comment": "Read fom xlsx file by Johannes Gütschow",
														
 
															-        "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
														
 
															+        "institution": "Republic of Korea, Ministry of Environment, "
														
 
															+        "Greenhouse Gas Inventory and Research Center",
														
 
															     }
														
 
															-
														
 
															-
														
 
															     cols_for_space_stripping = []
														
 
															     compression = dict(zlib=True, complevel=9)
														
@@ -137,11 +150,17 @@ if __name__ == "__main__":
 
															     for sheet in sheets_to_read:
														
 
															         # read current sheet (one sheet per gas)
														
 
															-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
														
 
															-                                   engine="openpyxl")
														
 
															+        df_current = pd.read_excel(
														
 
															+            input_folder / inventory_file,
														
 
															+            sheet_name=sheet,
														
 
															+            skiprows=3,
														
 
															+            nrows=146,
														
 
															+            usecols=cols_to_read,
														
 
															+            engine="openpyxl",
														
 
															+        )
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set index. necessary for the stack operation in the conversion to long format
														
 
															         # df_current = df_current.set_index(index_cols)
														
 
															         # make sure all col headers are str
														
@@ -157,14 +176,12 @@ if __name__ == "__main__":
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     # rename category col because filtering produces problems with korean col names
														
 
															-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
														
 
															+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
														
 
															     # create copies of category col for further processing
														
 
															     df_all["orig_cat_name"] = df_all["category"]
														
 
															     df_all["cat_name_translation"] = df_all["category"]
														
 
															-
														
 
															-
														
 
															     # ###
														
 
															     # convert to PRIMAP2 interchange format
														
 
															     # ###
														
@@ -175,20 +192,22 @@ if __name__ == "__main__":
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-        copy_df=True, # we need the unchanged DF for the conversion step
														
 
															-        )
														
 
															+        copy_df=True,  # we need the unchanged DF for the conversion step
														
 
															+    )
														
 
															     filter_data(data_if, filter_remove=filter_remove)
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															-    data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
														
 
															+    data_pm2 = data_pm2.reset_coords(
														
 
															+        ["orig_cat_name", "cat_name_translation"], drop=True
														
 
															+    )
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
 
															     # ###
														
@@ -196,17 +215,21 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # conversion to ipcc 2006 categories
														
 
															     # ###
														
 
															-
														
 
															     data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_all,
														
 
															         coords_cols=coords_cols,
														
@@ -219,21 +242,23 @@ if __name__ == "__main__":
 
															         copy_df=True,  # don't mess up the dataframe when testing
														
 
															     )
														
 
															-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
														
 
															+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
														
 
															     # agg before mapping
														
 
															     for cat_to_agg in aggregate_before_mapping:
														
 
															-        mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
														
 
															+        mask = data_if_2006[cat_label].isin(
														
 
															+            aggregate_before_mapping[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -241,20 +266,32 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum()
														
 
															-
														
 
															-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															+
														
 
															+            df_combine = df_combine.drop(
														
 
															+                columns=[
														
 
															+                    "category (IPCC2006_PRIMAP)",
														
 
															+                    "orig_cat_name",
														
 
															+                    "cat_name_translation",
														
 
															+                ]
														
 
															+            )
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name",
														
 
															-                              aggregate_before_mapping[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
 
															             if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
														
 
															-                filter_this_cat = {
														
 
															-                    "f": {cat_label: cat_to_agg}
														
 
															-                }
														
 
															+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
														
 
															                 filter_data(data_if_2006, filter_remove=filter_this_cat)
														
 
															             data_if_2006 = pd.concat([data_if_2006, df_combine])
														
@@ -271,17 +308,19 @@ if __name__ == "__main__":
 
															     # agg after mapping
														
 
															     for cat_to_agg in aggregate_after_mapping:
														
 
															-        mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
														
 
															+        mask = data_if_2006[cat_label].isin(
														
 
															+            aggregate_after_mapping[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -289,37 +328,56 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum()
														
 
															-
														
 
															-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum()
														
 
															+
														
 
															+            df_combine = df_combine.drop(
														
 
															+                columns=[
														
 
															+                    "category (IPCC2006_PRIMAP)",
														
 
															+                    "orig_cat_name",
														
 
															+                    "cat_name_translation",
														
 
															+                ]
														
 
															+            )
														
 
															             df_combine.insert(0, cat_label, cat_to_agg)
														
 
															-            df_combine.insert(1, "orig_cat_name",
														
 
															-                              aggregate_after_mapping[cat_to_agg]["name"])
														
 
															+            df_combine.insert(
														
 
															+                1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
														
 
															+            )
														
 
															             df_combine = df_combine.reset_index()
														
 
															             if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
														
 
															-                filter_this_cat = {
														
 
															-                    "f": {cat_label: cat_to_agg}
														
 
															-                }
														
 
															+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
														
 
															                 filter_data(data_if_2006, filter_remove=filter_this_cat)
														
 
															             data_if_2006 = pd.concat([data_if_2006, df_combine])
														
 
															         else:
														
 
															             print(f"no data to aggregate category {cat_to_agg}")
														
 
															-
														
 
															-    #conversion to PRIMAP2 native format
														
 
															+    # conversion to PRIMAP2 native format
														
 
															     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
														
 
															     # convert back to IF to have units in the fixed format
														
 
															-    data_pm2_2006 = data_pm2_2006.reset_coords(["orig_cat_name", "cat_name_translation"],
														
 
															-                                           drop=True)
														
 
															+    data_pm2_2006 = data_pm2_2006.reset_coords(
														
 
															+        ["orig_cat_name", "cat_name_translation"], drop=True
														
 
															+    )
														
 
															     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
														
 
															     # save IPCC2006 data
														
 
															     filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies_2006["category"]),
														
 
															+        data_if_2006,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
@@ -1,12 +1,17 @@
 
															-# this script reads data from Korea's BUR4
														
 
															-# Data is read from the xlsx file
														
 
															+"""
														
 
															+Read Korea's BUR4 from xlsx
														
 
															+
														
 
															+This script reads data from Korea's 2020 national inventory which is underlying BUR4
														
 
															+Data are read from the xlsx file
														
 
															+
														
 
															+"""
														
 
															 import os
														
 
															 import sys
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_kor_bur4 import cat_codes, cat_name_translations
														
 
															+from config_kor_bur4 import cat_codes, cat_name_translations
														
 
															 from primap2.pm2io._data_reading import filter_data
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -15,42 +20,43 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
														
 
															-                   '2020-Inventory'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Republic_of_Korea'
														
 
															+    input_folder = (
														
 
															+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2020-Inventory"
														
 
															+    )
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Republic_of_Korea"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'KOR_BUR4_2021_'
														
 
															+    output_filename = "KOR_BUR4_2021_"
														
 
															-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2018).xlsx'
														
 
															+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2018).xlsx"
														
 
															     years_to_read = range(1990, 2018 + 1)
														
 
															-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
														
 
															+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
														
 
															     cols_to_read = range(1, 2018 - 1990 + 3)
														
 
															     # columns for category code and original category name
														
 
															-    index_cols = ['분야·부문/연도']
														
 
															+    index_cols = ["분야·부문/연도"]
														
 
															     sheet_metadata = {
														
 
															-        'entity': {
														
 
															-            '온실가스': 'KYOTOGHG (SARGWP100)',
														
 
															-            'CO2': 'CO2',
														
 
															-            'CH4': 'CH4 (SARGWP100)',
														
 
															-            'N2O': 'N2O (SARGWP100)',
														
 
															-            'HFCs': 'HFCS (SARGWP100)',
														
 
															-            'PFCs': 'PFCS (SARGWP100)',
														
 
															-            'SF6': 'SF6 (SARGWP100)',
														
 
															+        "entity": {
														
 
															+            "온실가스": "KYOTOGHG (SARGWP100)",
														
 
															+            "CO2": "CO2",
														
 
															+            "CH4": "CH4 (SARGWP100)",
														
 
															+            "N2O": "N2O (SARGWP100)",
														
 
															+            "HFCs": "HFCS (SARGWP100)",
														
 
															+            "PFCs": "PFCS (SARGWP100)",
														
 
															+            "SF6": "SF6 (SARGWP100)",
														
 
															+        },
														
 
															+        "unit": {
														
 
															+            "온실가스": "Gg CO2 / yr",
														
 
															+            "CO2": "Gg CO2 / yr",
														
 
															+            "CH4": "Gg CO2 / yr",
														
 
															+            "N2O": "Gg CO2 / yr",
														
 
															+            "HFCs": "Gg CO2 / yr",
														
 
															+            "PFCs": "Gg CO2 / yr",
														
 
															+            "SF6": "Gg CO2 / yr",
														
 
															         },
														
 
															-        'unit': {
														
 
															-            '온실가스': 'Gg CO2 / yr',
														
 
															-            'CO2': 'Gg CO2 / yr',
														
 
															-            'CH4': 'Gg CO2 / yr',
														
 
															-            'N2O': 'Gg CO2 / yr',
														
 
															-            'HFCs': 'Gg CO2 / yr',
														
 
															-            'PFCs': 'Gg CO2 / yr',
														
 
															-            'SF6': 'Gg CO2 / yr',
														
 
															-        }
														
 
															     }
														
 
															     # definitions for conversion to interchange format
														
@@ -64,7 +70,7 @@ if __name__ == "__main__":
 
															     add_coords_cols = {
														
 
															         "orig_cat_name": ["orig_cat_name", "category"],
														
 
															-        "cat_name_translation": ["cat_name_translation", "category"]
														
 
															+        "cat_name_translation": ["cat_name_translation", "category"],
														
 
															     }
														
 
															     coords_terminologies = {
														
@@ -90,21 +96,32 @@ if __name__ == "__main__":
 
															         "f1": {
														
 
															             "category (IPCC1996_KOR_INV)": "\\IGNORE",
														
 
															         },
														
 
															-        "livestock": { # temp until double cat name problem is solved
														
 
															-            "category (IPCC1996_KOR_INV)": {
														
 
															-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
														
 
															-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
														
 
															-            }
														
 
															-        }
														
 
															+        "livestock": {  # temp until double cat name problem is solved
														
 
															+            "category (IPCC1996_KOR_INV)": [
														
 
															+                "4.B.1",
														
 
															+                "4.B.10",
														
 
															+                "4.B.2",
														
 
															+                "4.B.3",
														
 
															+                "4.B.4",
														
 
															+                "4.B.5",
														
 
															+                "4.B.6",
														
 
															+                "4.B.7",
														
 
															+                "4.B.8",
														
 
															+                "4.B.9",
														
 
															+            ]
														
 
															+        },
														
 
															     }
														
 
															     filter_keep = {}
														
 
															     meta_data = {
														
 
															-        "references": "https://unfccc.int/documents/418616, http://www.gir.go.kr/home/file/readDownloadFile.do?fileId=4856&fileSeq=2",
														
 
															+        "references": "https://unfccc.int/documents/418616, "
														
 
															+        "http://www.gir.go.kr/home/file/readDownloadFile.do?"
														
 
															+        "fileId=4856&fileSeq=2",
														
 
															         "rights": "",
														
 
															         "contact": "mail@johannes-guetschow.de.de",
														
 
															-        "title": "Republic of Korea: BUR4 / National Greenhouse Gas Inventory Report 2020",
														
 
															+        "title": "Republic of Korea: BUR4 / National Greenhouse Gas Inventory Report "
														
 
															+        "2020",
														
 
															         "comment": "Read fom xlsx file by Johannes Gütschow",
														
 
															         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
														
 
															     }
														
@@ -126,11 +143,17 @@ if __name__ == "__main__":
 
															     for sheet in sheets_to_read:
														
 
															         # read current sheet (one sheet per gas)
														
 
															-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
														
 
															-                                   engine="openpyxl")
														
 
															+        df_current = pd.read_excel(
														
 
															+            input_folder / inventory_file,
														
 
															+            sheet_name=sheet,
														
 
															+            skiprows=3,
														
 
															+            nrows=144,
														
 
															+            usecols=cols_to_read,
														
 
															+            engine="openpyxl",
														
 
															+        )
														
 
															         # drop all rows where the index cols (category code and name) are both NaN
														
 
															         # as without one of them there is no category information
														
 
															-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
														
 
															         # set index. necessary for the stack operation in the conversion to long format
														
 
															         # df_current = df_current.set_index(index_cols)
														
 
															         # add columns
														
@@ -144,7 +167,7 @@ if __name__ == "__main__":
 
															     df_all = df_all.reset_index(drop=True)
														
 
															     # rename category col because filtering produces problems with korean col names
														
 
															-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
														
 
															+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
														
 
															     # create copies of category col for further processing
														
 
															     df_all["orig_cat_name"] = df_all["category"]
														
@@ -163,12 +186,12 @@ if __name__ == "__main__":
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															-        convert_str=True
														
 
															-        )
														
 
															+        convert_str=True,
														
 
															+    )
														
 
															     filter_data(data_if, filter_remove=filter_remove)
														
@@ -181,7 +204,12 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Singapore's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'SGP'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=SGP
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py
@@ -1,152 +1,222 @@
 
															+"""Config for Singapore's BUR5
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 table_def_templates = {
														
 
															-    '66_1': {  # 66
														
 
															-        "area": ['68,743,522,157'],
														
 
															-        "cols": ['224,280,319,359,399,445,481'],
														
 
															+    "66_1": {  # 66
														
 
															+        "area": ["68,743,522,157"],
														
 
															+        "cols": ["224,280,319,359,399,445,481"],
														
 
															         "rows_to_fix": {
														
 
															             # 2: ['and Sink Categories',],
														
 
															-            3: ['1A2 Manufacturing Industries',
														
 
															-                '1B3 Other Emissions from', '1C - Carbon Dioxide Transport',
														
 
															-                '2 — INDUSTRIAL PROCESSES AND', '2D - Non-Energy Products from',
														
 
															-                '2F - Product Uses as Substitutes for',
														
 
															-                '2G - Other Product Manufacture'],
														
 
															+            3: [
														
 
															+                "1A2 Manufacturing Industries",
														
 
															+                "1B3 Other Emissions from",
														
 
															+                "1C - Carbon Dioxide Transport",
														
 
															+                "2 — INDUSTRIAL PROCESSES AND",
														
 
															+                "2D - Non-Energy Products from",
														
 
															+                "2F - Product Uses as Substitutes for",
														
 
															+                "2G - Other Product Manufacture",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '66_2': {  # 66
														
 
															-        "area": ['671,744,1117,265'],
														
 
															-        "cols": ['824,875,912,954,996,1040,1082'],
														
 
															+    "66_2": {  # 66
														
 
															+        "area": ["671,744,1117,265"],
														
 
															+        "cols": ["824,875,912,954,996,1040,1082"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3 — AGRICULTURE, FORESTRY AND', '3C - Aggregate Sources and Non-CO2',
														
 
															-                '4C - Incineration and Open Burning',
														
 
															-                '4D -  Wastewater Treatment',
														
 
															-                '5A - Indirect N2O emissions from the', 'CO2 from Biomass Combustion',
														
 
															-                ],
														
 
															+            3: [
														
 
															+                "3 — AGRICULTURE, FORESTRY AND",
														
 
															+                "3C - Aggregate Sources and Non-CO2",
														
 
															+                "4C - Incineration and Open Burning",
														
 
															+                "4D -  Wastewater Treatment",
														
 
															+                "5A - Indirect N2O emissions from the",
														
 
															+                "CO2 from Biomass Combustion",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '67_1': {  # 67
														
 
															-        "area": ['70,727,554,159'],
														
 
															-        "cols": ['207,254,291,319,356,400,442,468,503'],
														
 
															+    "67_1": {  # 67
														
 
															+        "area": ["70,727,554,159"],
														
 
															+        "cols": ["207,254,291,319,356,400,442,468,503"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2 — INDUSTRIAL PROCESSES', '2A4 Other Process Uses',
														
 
															-                '2B4 Caprolactam, Glyoxal and', '2B8 Petrochemical and',
														
 
															-                ],
														
 
															-            3: ['Total National Emissions',
														
 
															-                ],
														
 
															+            2: [
														
 
															+                "2 — INDUSTRIAL PROCESSES",
														
 
															+                "2A4 Other Process Uses",
														
 
															+                "2B4 Caprolactam, Glyoxal and",
														
 
															+                "2B8 Petrochemical and",
														
 
															+            ],
														
 
															+            3: [
														
 
															+                "Total National Emissions",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '67_2': {  # 67
														
 
															-        "area": ['666,725,1150,119'],
														
 
															-        "cols": ['801,847,889,915,952,996,1036,1063,1098'],
														
 
															+    "67_2": {  # 67
														
 
															+        "area": ["666,725,1150,119"],
														
 
															+        "cols": ["801,847,889,915,952,996,1036,1063,1098"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2D - Non-Energy Products from', '2G - Other Product',
														
 
															-                '2G2 SF6 and PFCs from', '2H2 Food and Beverages',
														
 
															-                ],
														
 
															-            3: ['Total National Emissions', '2E1 Integrated Circuit',
														
 
															-                '2F - Product Uses as Substitutes for', '2F1 Refrigeration and',
														
 
															-                ],
														
 
															+            2: [
														
 
															+                "2D - Non-Energy Products from",
														
 
															+                "2G - Other Product",
														
 
															+                "2G2 SF6 and PFCs from",
														
 
															+                "2H2 Food and Beverages",
														
 
															+            ],
														
 
															+            3: [
														
 
															+                "Total National Emissions",
														
 
															+                "2E1 Integrated Circuit",
														
 
															+                "2F - Product Uses as Substitutes for",
														
 
															+                "2F1 Refrigeration and",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '68_1': {  # 68
														
 
															-        "area": ['66,787,524,217'],
														
 
															-        "cols": ['205,261,315,366,415,473'],
														
 
															+    "68_1": {  # 68
														
 
															+        "area": ["66,787,524,217"],
														
 
															+        "cols": ["205,261,315,366,415,473"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2 — INDUSTRIAL PROCESSES', '2A4 Other Process Uses',
														
 
															-                '2B4 Caprolactam, Glyoxal and', '2B8 Petrochemical and',
														
 
															-                ],
														
 
															-            3: ['Total National Emissions',
														
 
															-                ],
														
 
															+            2: [
														
 
															+                "2 — INDUSTRIAL PROCESSES",
														
 
															+                "2A4 Other Process Uses",
														
 
															+                "2B4 Caprolactam, Glyoxal and",
														
 
															+                "2B8 Petrochemical and",
														
 
															+            ],
														
 
															+            3: [
														
 
															+                "Total National Emissions",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '68_2': {  # 68
														
 
															-        "area": ['666,787,1119,180'],
														
 
															-        "cols": ['808,854,910,961,1017,1066'],
														
 
															+    "68_2": {  # 68
														
 
															+        "area": ["666,787,1119,180"],
														
 
															+        "cols": ["808,854,910,961,1017,1066"],
														
 
															         "rows_to_fix": {
														
 
															-            2: ['2D - Non-Energy Products from',
														
 
															-                '2F - Product Uses as Substitutes for', '2F1 Refrigeration and Air',
														
 
															-                '2G2 SF6 and PFCs from Other', '2H2 Food and Beverages',
														
 
															-                ],
														
 
															-            3: ['Total National Emissions', '2E1 Integrated Circuit or',
														
 
															-                '2G - Other Product Manufacture',
														
 
															-                ],
														
 
															+            2: [
														
 
															+                "2D - Non-Energy Products from",
														
 
															+                "2F - Product Uses as Substitutes for",
														
 
															+                "2F1 Refrigeration and Air",
														
 
															+                "2G2 SF6 and PFCs from Other",
														
 
															+                "2H2 Food and Beverages",
														
 
															+            ],
														
 
															+            3: [
														
 
															+                "Total National Emissions",
														
 
															+                "2E1 Integrated Circuit or",
														
 
															+                "2G - Other Product Manufacture",
														
 
															+            ],
														
 
															         },
														
 
															     },
														
 
															-    '84_1': {  # 84
														
 
															-        "area": ['70,667,525,112'],
														
 
															-        "cols": ['193,291,345,396,440,480'],
														
 
															+    "84_1": {  # 84
														
 
															+        "area": ["70,667,525,112"],
														
 
															+        "cols": ["193,291,345,396,440,480"],
														
 
															         "rows_to_fix": {},
														
 
															     },
														
 
															-    '84_2': {  # 84
														
 
															-        "area": ['668,667,1115,83'],
														
 
															-        "cols": ['854,908,954,1001,1038,1073'],
														
 
															-        "rows_to_fix": { },
														
 
															+    "84_2": {  # 84
														
 
															+        "area": ["668,667,1115,83"],
														
 
															+        "cols": ["854,908,954,1001,1038,1073"],
														
 
															+        "rows_to_fix": {},
														
 
															     },
														
 
															-    '85_1': {  # 85
														
 
															-        "area": ['70,680,531,170'],
														
 
															-        "cols": ['275,328,375,414,456,489'],
														
 
															+    "85_1": {  # 85
														
 
															+        "area": ["70,680,531,170"],
														
 
															+        "cols": ["275,328,375,414,456,489"],
														
 
															         "rows_to_fix": {},
														
 
															     },
														
 
															-    '85_2': {  # 85
														
 
															-        "area": ['663,675,1117,175'],
														
 
															-        "cols": ['849,908,954,1001,1045,1073'],
														
 
															+    "85_2": {  # 85
														
 
															+        "area": ["663,675,1117,175"],
														
 
															+        "cols": ["849,908,954,1001,1045,1073"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['3C — Aggregate Sources and Non-CO2',
														
 
															-                '3C4 - Direct N2O Emissions from', '3C5 - Indirect N2O Emissions from',
														
 
															-                '3C6 - Indirect N2O Emissions from']
														
 
															+            3: [
														
 
															+                "3C — Aggregate Sources and Non-CO2",
														
 
															+                "3C4 - Direct N2O Emissions from",
														
 
															+                "3C5 - Indirect N2O Emissions from",
														
 
															+                "3C6 - Indirect N2O Emissions from",
														
 
															+            ]
														
 
															         },
														
 
															     },
														
 
															-    '92': {  # 92
														
 
															-        "area": ['72,672,514,333'],
														
 
															-        "cols": ['228,275,319,361,398,438,489'],
														
 
															+    "92": {  # 92
														
 
															+        "area": ["72,672,514,333"],
														
 
															+        "cols": ["228,275,319,361,398,438,489"],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['4A1 Managed Waste',
														
 
															-                '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
														
 
															-                '4C - Incineration and', '4D - Wastewater Treatment',
														
 
															-                '4D1 Domestic Wastewater', '4D2 Industrial Wastewater']
														
 
															+            3: [
														
 
															+                "4A1 Managed Waste",
														
 
															+                "4A2 Unmanaged Waste",
														
 
															+                "4A3 Uncategorised Waste",
														
 
															+                "4C - Incineration and",
														
 
															+                "4D - Wastewater Treatment",
														
 
															+                "4D1 Domestic Wastewater",
														
 
															+                "4D2 Industrial Wastewater",
														
 
															+            ]
														
 
															         },
														
 
															     },
														
 
															-    '95_1': {  # 95
														
 
															-        "area": ['70,731,507,149'],
														
 
															-        "cols": ['233,307,375,452'],
														
 
															+    "95_1": {  # 95
														
 
															+        "area": ["70,731,507,149"],
														
 
															+        "cols": ["233,307,375,452"],
														
 
															         "drop_rows": [0, 1, 2, 3],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['Total (Net)', '1A2 Manufacturing Industries',
														
 
															-                '2 — INDUSTRIAL PROCESSES', '3 — AGRICULTURE, FORESTRY',
														
 
															-                '3C - Aggregate Sources and Non-CO2', '4C - Incineration and Open',
														
 
															-                'Clinical Waste', '4D - Wastewater Treatment',
														
 
															-                'CO2 from Biomass Combustion for']
														
 
															+            3: [
														
 
															+                "Total (Net)",
														
 
															+                "1A2 Manufacturing Industries",
														
 
															+                "2 — INDUSTRIAL PROCESSES",
														
 
															+                "3 — AGRICULTURE, FORESTRY",
														
 
															+                "3C - Aggregate Sources and Non-CO2",
														
 
															+                "4C - Incineration and Open",
														
 
															+                "Clinical Waste",
														
 
															+                "4D - Wastewater Treatment",
														
 
															+                "CO2 from Biomass Combustion for",
														
 
															+            ]
														
 
															         },
														
 
															         "header": {
														
 
															-            'entity': ['Greenhouse Gas Source and Sink Categories',
														
 
															-                       'Net CO2', 'CH4', 'N2O', 'HFCs'],
														
 
															-            'unit': ['', 'Gg', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
														
 
															+            "entity": [
														
 
															+                "Greenhouse Gas Source and Sink Categories",
														
 
															+                "Net CO2",
														
 
															+                "CH4",
														
 
															+                "N2O",
														
 
															+                "HFCs",
														
 
															+            ],
														
 
															+            "unit": ["", "Gg", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
														
 
															         },
														
 
															     },
														
 
															-    '95_2': {  # 95
														
 
															-        "area": ['666,731,1103,149'],
														
 
															-        "cols": ['829,903,971,1048'],
														
 
															+    "95_2": {  # 95
														
 
															+        "area": ["666,731,1103,149"],
														
 
															+        "cols": ["829,903,971,1048"],
														
 
															         "drop_rows": [0, 1, 2, 3, 4, 5],
														
 
															         "rows_to_fix": {
														
 
															-            3: ['Total (Net)', '1A2 Manufacturing Industries',
														
 
															-                '2 — INDUSTRIAL PROCESSES', '3 — AGRICULTURE, FORESTRY',
														
 
															-                '3C - Aggregate Sources and Non-CO2', '4C - Incineration and Open',
														
 
															-                'Clinical Waste', '4D - Wastewater Treatment',
														
 
															-                'CO2 from Biomass Combustion for']
														
 
															+            3: [
														
 
															+                "Total (Net)",
														
 
															+                "1A2 Manufacturing Industries",
														
 
															+                "2 — INDUSTRIAL PROCESSES",
														
 
															+                "3 — AGRICULTURE, FORESTRY",
														
 
															+                "3C - Aggregate Sources and Non-CO2",
														
 
															+                "4C - Incineration and Open",
														
 
															+                "Clinical Waste",
														
 
															+                "4D - Wastewater Treatment",
														
 
															+                "CO2 from Biomass Combustion for",
														
 
															+            ]
														
 
															         },
														
 
															         "header": {
														
 
															-            'entity': ['Greenhouse Gas Source and Sink Categories',
														
 
															-                       'PFCs', 'SF6', 'NF3', 'Total (Net) National Emissions'],
														
 
															-            'unit': ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
														
 
															+            "entity": [
														
 
															+                "Greenhouse Gas Source and Sink Categories",
														
 
															+                "PFCs",
														
 
															+                "SF6",
														
 
															+                "NF3",
														
 
															+                "Total (Net) National Emissions",
														
 
															+            ],
														
 
															+            "unit": ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 table_defs = {
														
 
															-    '66': {
														
 
															-        "templates": ['66_1', '66_2'],
														
 
															+    "66": {
														
 
															+        "templates": ["66_1", "66_2"],
														
 
															         # "header_rows": [0, 1],
														
 
															         "header": {
														
 
															-            'entity': ['Greenhouse Gas Source and Sink Categories', 'Net CO2',
														
 
															-                       'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NF3'],
														
 
															-            'unit': ['', 'Gg', 'Gg', 'Gg', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
														
 
															+            "entity": [
														
 
															+                "Greenhouse Gas Source and Sink Categories",
														
 
															+                "Net CO2",
														
 
															+                "CH4",
														
 
															+                "N2O",
														
 
															+                "HFCs",
														
 
															+                "PFCs",
														
 
															+                "SF6",
														
 
															+                "NF3",
														
 
															+            ],
														
 
															+            "unit": ["", "Gg", "Gg", "Gg", "GgCO2eq", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2, 3],
														
 
															         # "drop_cols": ['NF3', 'SF6'],
														
@@ -155,13 +225,22 @@ table_defs = {
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018",
														
 
															     },
														
 
															-    '67': {
														
 
															-        "templates": ['67_1', '67_2'],
														
 
															+    "67": {
														
 
															+        "templates": ["67_1", "67_2"],
														
 
															         "header": {
														
 
															-            'entity': ['Greenhouse Gas Source and Sink Categories', 'HFC-23', 'HFC-32',
														
 
															-                       'HFC-41', 'HFC-125', 'HFC-134a', 'HFC-143a', 'HFC-152a',
														
 
															-                       'HFC-227ea', 'HFC-43-10mee'],
														
 
															-            'unit': ['', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg'],
														
 
															+            "entity": [
														
 
															+                "Greenhouse Gas Source and Sink Categories",
														
 
															+                "HFC-23",
														
 
															+                "HFC-32",
														
 
															+                "HFC-41",
														
 
															+                "HFC-125",
														
 
															+                "HFC-134a",
														
 
															+                "HFC-143a",
														
 
															+                "HFC-152a",
														
 
															+                "HFC-227ea",
														
 
															+                "HFC-43-10mee",
														
 
															+            ],
														
 
															+            "unit": ["", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2, 3],
														
 
															         # "drop_cols": ['NF3', 'SF6'],
														
@@ -170,24 +249,31 @@ table_defs = {
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018_fgases",
														
 
															     },
														
 
															-    '68': {
														
 
															-        "templates": ['68_1', '68_2'],
														
 
															+    "68": {
														
 
															+        "templates": ["68_1", "68_2"],
														
 
															         "header": {
														
 
															-            'entity': ['Greenhouse Gas Source and Sink Categories', 'PFC-14',
														
 
															-                       'PFC-116', 'PFC-218', 'PFC-318', 'SF6', 'NF3'],
														
 
															-            'unit': ['', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg'],
														
 
															+            "entity": [
														
 
															+                "Greenhouse Gas Source and Sink Categories",
														
 
															+                "PFC-14",
														
 
															+                "PFC-116",
														
 
															+                "PFC-218",
														
 
															+                "PFC-318",
														
 
															+                "SF6",
														
 
															+                "NF3",
														
 
															+            ],
														
 
															+            "unit": ["", "kg", "kg", "kg", "kg", "kg", "kg"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2],
														
 
															-         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															+        "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2018,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018_fgases",
														
 
															     },
														
 
															-    '84': {
														
 
															-        "templates": ['84_1', '84_2'],
														
 
															+    "84": {
														
 
															+        "templates": ["84_1", "84_2"],
														
 
															         "header": {
														
 
															-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC'],
														
 
															-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
														
 
															+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC"],
														
 
															+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2, 3, 4, 5],
														
 
															         "category_col": "Categories",
														
@@ -195,11 +281,11 @@ table_defs = {
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018",
														
 
															     },
														
 
															-    '85': {
														
 
															-        "templates": ['85_1', '85_2'],
														
 
															+    "85": {
														
 
															+        "templates": ["85_1", "85_2"],
														
 
															         "header": {
														
 
															-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC'],
														
 
															-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
														
 
															+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC"],
														
 
															+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2, 3, 4, 5],
														
 
															         "category_col": "Categories",
														
@@ -207,11 +293,11 @@ table_defs = {
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018",
														
 
															     },
														
 
															-    '92': {
														
 
															-        "templates": ['92'],
														
 
															+    "92": {
														
 
															+        "templates": ["92"],
														
 
															         "header": {
														
 
															-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC', 'SO2'],
														
 
															-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
														
 
															+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC", "SO2"],
														
 
															+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
														
 
															         },
														
 
															         "drop_rows": [0, 1, 2],
														
 
															         "category_col": "Categories",
														
@@ -219,43 +305,43 @@ table_defs = {
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "2018",
														
 
															     },
														
 
															-    '95': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "95": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2016,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "other",
														
 
															     },
														
 
															-    '96': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "96": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2014,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "other",
														
 
															     },
														
 
															-    '97': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "97": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2012,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "other",
														
 
															     },
														
 
															-    '98': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "98": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2010,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "other",
														
 
															     },
														
 
															-    '99': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "99": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 2000,
														
 
															         # "unit_info": unit_info_2018,
														
 
															         "coords_value_mapping": "other",
														
 
															     },
														
 
															-    '100': {
														
 
															-        "templates": ['95_1', '95_2'],
														
 
															+    "100": {
														
 
															+        "templates": ["95_1", "95_2"],
														
 
															         "category_col": "Greenhouse Gas Source and Sink Categories",
														
 
															         "year": 1994,
														
 
															         # "unit_info": unit_info_2018,
														
@@ -264,12 +350,12 @@ table_defs = {
 
															 }
														
 
															 cat_names_fix = {
														
 
															-    '14Ab Residential': '1A4b Residential',
														
 
															+    "14Ab Residential": "1A4b Residential",
														
 
															 }
														
 
															 values_replacement = {
														
 
															-#    '': '-',
														
 
															-    ' ': '',
														
 
															+    #    '': '-',
														
 
															+    " ": "",
														
 
															 }
														
 
															 gwp_to_use = "AR5GWP100"
														
@@ -281,28 +367,28 @@ unit_row = "header"
 
															 ## parameters part 2: conversion to PRIMAP2 interchnage format
														
 
															-cats_remove = ['Information items']
														
 
															+cats_remove = ["Information items"]
														
 
															 cat_codes_manual = {
														
 
															-    'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
														
 
															-    'Total National Emissions and Removals': '0',
														
 
															-    'Total (Net) National Emissions': '0',
														
 
															-    'Clinical Waste Incineration': 'M.4.C.1',
														
 
															-    'Hazardous Waste Incineration': 'M.4.C.2',
														
 
															+    "CO2 from Biomass Combustion for Energy Production": "M.BIO",
														
 
															+    "Total National Emissions and Removals": "0",
														
 
															+    "Total (Net) National Emissions": "0",
														
 
															+    "Clinical Waste Incineration": "M.4.C.1",
														
 
															+    "Hazardous Waste Incineration": "M.4.C.2",
														
 
															     #'3 AGRICULTURE': 'M.AG',
														
 
															-    '3 AGRICULTURE, FORESTRY AND OTHER LAND USE': '3',
														
 
															+    "3 AGRICULTURE, FORESTRY AND OTHER LAND USE": "3",
														
 
															     #'3 LAND USE, LAND-USE CHANGE AND FORESTRY': 'M.LULUCF',
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[A-Za-z0-9]{1,7})\s.*'
														
 
															+cat_code_regexp = r"(?P<code>^[A-Za-z0-9]{1,7})\s.*"
														
 
															 # special header as category code and name in one column
														
 
															 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															 coords_terminologies = {
														
 
															     "area": "ISO3",
														
 
															-    "category": "IPCC2006_PRIMAP", #two extra categories
														
 
															+    "category": "IPCC2006_PRIMAP",  # two extra categories
														
 
															     "scenario": "PRIMAP",
														
 
															 }
														
@@ -310,63 +396,59 @@ coords_defaults = {
 
															     "source": "SGP-GHG-inventory ",
														
 
															     "provenance": "measured",
														
 
															     "area": "SGP",
														
 
															-    "scenario": "BUR5"
														
 
															+    "scenario": "BUR5",
														
 
															 }
														
 
															 coords_value_mapping = {
														
 
															     "2018": {
														
 
															         "unit": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'HFCs': f'HFCS ({gwp_to_use})',
														
 
															-            'PFCs': f'PFCS ({gwp_to_use})',
														
 
															-            'CH4': 'CH4',
														
 
															-            'N2O': 'N2O',
														
 
															-            'NF3': f'NF3 ({gwp_to_use})',
														
 
															-            'Net CO2': 'CO2',
														
 
															-            'SF6': f'SF6 ({gwp_to_use})',
														
 
															-            'Total (Net) National Emissions': 'KYOTOGHG (AR5GWP100)',
														
 
															+            "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+            "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+            "CH4": "CH4",
														
 
															+            "N2O": "N2O",
														
 
															+            "NF3": f"NF3 ({gwp_to_use})",
														
 
															+            "Net CO2": "CO2",
														
 
															+            "SF6": f"SF6 ({gwp_to_use})",
														
 
															+            "Total (Net) National Emissions": "KYOTOGHG (AR5GWP100)",
														
 
															         },
														
 
															     },
														
 
															     "2018_fgases": {
														
 
															         "unit": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'HFC-125': 'HFC125',
														
 
															-            'HFC-134a': 'HFC134a',
														
 
															-            'HFC-143a': 'HFC143a',
														
 
															-            'HFC-152a': 'HFC152a',
														
 
															-            'HFC-227ea': 'HFC227ea',
														
 
															-            'HFC-23': 'HFC23',
														
 
															-            'HFC-32': 'HFC32',
														
 
															-            'HFC-41': 'HFC41',
														
 
															-            'HFC-43-10mee': 'HFC4310mee',
														
 
															-            'NF3': 'NF3',
														
 
															-            'PFC-116': 'C2F6',
														
 
															-            'PFC-14': 'CF4',
														
 
															-            'PFC-218': 'C3F8',
														
 
															-            'PFC-318': 'cC4F8',
														
 
															-            'SF6': 'SF6',
														
 
															+            "HFC-125": "HFC125",
														
 
															+            "HFC-134a": "HFC134a",
														
 
															+            "HFC-143a": "HFC143a",
														
 
															+            "HFC-152a": "HFC152a",
														
 
															+            "HFC-227ea": "HFC227ea",
														
 
															+            "HFC-23": "HFC23",
														
 
															+            "HFC-32": "HFC32",
														
 
															+            "HFC-41": "HFC41",
														
 
															+            "HFC-43-10mee": "HFC4310mee",
														
 
															+            "NF3": "NF3",
														
 
															+            "PFC-116": "C2F6",
														
 
															+            "PFC-14": "CF4",
														
 
															+            "PFC-218": "C3F8",
														
 
															+            "PFC-318": "cC4F8",
														
 
															+            "SF6": "SF6",
														
 
															         },
														
 
															     },
														
 
															     "other": {
														
 
															         "unit": "PRIMAP1",
														
 
															         "entity": {
														
 
															-            'HFCs': f'HFCS ({gwp_to_use})',
														
 
															-            'CH4': f'CH4 ({gwp_to_use})',
														
 
															-            'N2O': f'N2O ({gwp_to_use})',
														
 
															-            'NF3': f'NF3 ({gwp_to_use})',
														
 
															-            'Net CO2': 'CO2',
														
 
															-            'PFCs': f'PFCS ({gwp_to_use})',
														
 
															-            'SF6': f'SF6 ({gwp_to_use})',
														
 
															-            'Total (Net) National Emissions': f'KYOTOGHG ({gwp_to_use})',
														
 
															+            "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+            "CH4": f"CH4 ({gwp_to_use})",
														
 
															+            "N2O": f"N2O ({gwp_to_use})",
														
 
															+            "NF3": f"NF3 ({gwp_to_use})",
														
 
															+            "Net CO2": "CO2",
														
 
															+            "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+            "SF6": f"SF6 ({gwp_to_use})",
														
 
															+            "Total (Net) National Emissions": f"KYOTOGHG ({gwp_to_use})",
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															-coords_cols = {
														
 
															-    "category": "category",
														
 
															-    "entity": "entity",
														
 
															-    "unit": "unit"
														
 
															-}
														
 
															+coords_cols = {"category": "category", "entity": "entity", "unit": "unit"}
														
 
															 add_coords_cols = {
														
 
															     "orig_cat_name": ["orig_cat_name", "category"],
														
@@ -386,7 +468,7 @@ meta_data = {
 
															     "rights": "",
														
 
															     "contact": "mail@johannes-guetschow.de",
														
 
															     "title": "Singapore's Fifth National Communication and Fifth Biannial Update "
														
 
															-             "Report",
														
 
															+    "Report",
														
 
															     "comment": "Read fom pdf file by Johannes Gütschow",
														
 
															     "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
														
 
															 }
														
@@ -394,92 +476,165 @@ meta_data = {
 
															 ## processing
														
 
															 aggregate_sectors = {
														
 
															-    '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
														
 
															-          'name': 'IPPU'},
														
 
															-    'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'], 'name': 'Emissions from Biomass Burning (Agriculture)'},
														
 
															-    'M.3.C.1.LU': {'sources': ['3.C.1.a', '3.C.1.d'], 'name': 'Emissions from Biomass Burning (LULUCF)'},
														
 
															-    'M.3.C.AG': {'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
														
 
															-                             '3.C.6', '3.C.7', '3.C.8'],
														
 
															-                 'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
														
 
															-    'M.AG': {'sources': ['M.AG.ELV', '3.A'], 'name': 'Agriculture'},
														
 
															-    'M.LULUCF': {'sources': ['M.3.C.1.LU', '3.B', '3.D'],
														
 
															-                 'name': 'Land Use, Land Use Change, and Forestry'},
														
 
															-    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National Total Excluding LULUCF'},
														
 
															-    '0': {'sources': ['1', '2', '3', '4', '5'], 'name': 'National Total'},
														
 
															+    "2": {
														
 
															+        "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
														
 
															+        "name": "IPPU",
														
 
															+    },
														
 
															+    "M.3.C.1.AG": {
														
 
															+        "sources": ["3.C.1.b", "3.C.1.c"],
														
 
															+        "name": "Emissions from Biomass Burning (Agriculture)",
														
 
															+    },
														
 
															+    "M.3.C.1.LU": {
														
 
															+        "sources": ["3.C.1.a", "3.C.1.d"],
														
 
															+        "name": "Emissions from Biomass Burning (LULUCF)",
														
 
															+    },
														
 
															+    "M.3.C.AG": {
														
 
															+        "sources": [
														
 
															+            "M.3.C.1.AG",
														
 
															+            "3.C.2",
														
 
															+            "3.C.3",
														
 
															+            "3.C.4",
														
 
															+            "3.C.5",
														
 
															+            "3.C.6",
														
 
															+            "3.C.7",
														
 
															+            "3.C.8",
														
 
															+        ],
														
 
															+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+    },
														
 
															+    "M.AG.ELV": {
														
 
															+        "sources": ["M.3.C.AG"],
														
 
															+        "name": "Agriculture excluding livestock emissions",
														
 
															+    },
														
 
															+    "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
														
 
															+    "M.LULUCF": {
														
 
															+        "sources": ["M.3.C.1.LU", "3.B", "3.D"],
														
 
															+        "name": "Land Use, Land Use Change, and Forestry",
														
 
															+    },
														
 
															+    "M.0.EL": {
														
 
															+        "sources": ["1", "2", "M.AG", "4", "5"],
														
 
															+        "name": "National Total Excluding LULUCF",
														
 
															+    },
														
 
															+    "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
														
 
															 }
														
 
															 processing_info_step1 = {
														
 
															     # aggregate IPPU which is missing for individual fgases so it can be used in the
														
 
															     # next step (downscaling)
														
 
															-    'aggregate_cats': {
														
 
															-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
														
 
															-              'name': 'IPPU'},
														
 
															+    "aggregate_cats": {
														
 
															+        "2": {
														
 
															+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
														
 
															+            "name": "IPPU",
														
 
															+        },
														
 
															     },
														
 
															-    'tolerance': 1, # because ch4 is inconsistent
														
 
															+    "tolerance": 1,  # because ch4 is inconsistent
														
 
															 }
														
 
															-processing_info_step2 =  {
														
 
															-    'aggregate_cats': aggregate_sectors,
														
 
															-    'downscale': {
														
 
															-        'sectors': {
														
 
															-            'IPPU': {
														
 
															-                'basket': '2',
														
 
															-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.E',
														
 
															-                                    '2.F', '2.G', '2.H'],
														
 
															-                'entities': ['CO2', 'N2O', f'PFCS ({gwp_to_use})',
														
 
															-                             f'HFCS ({gwp_to_use})', 'SF6', 'NF3'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+processing_info_step2 = {
														
 
															+    "aggregate_cats": aggregate_sectors,
														
 
															+    "downscale": {
														
 
															+        "sectors": {
														
 
															+            "IPPU": {
														
 
															+                "basket": "2",
														
 
															+                "basket_contents": [
														
 
															+                    "2.A",
														
 
															+                    "2.B",
														
 
															+                    "2.C",
														
 
															+                    "2.D",
														
 
															+                    "2.E",
														
 
															+                    "2.F",
														
 
															+                    "2.G",
														
 
															+                    "2.H",
														
 
															+                ],
														
 
															+                "entities": [
														
 
															+                    "CO2",
														
 
															+                    "N2O",
														
 
															+                    f"PFCS ({gwp_to_use})",
														
 
															+                    f"HFCS ({gwp_to_use})",
														
 
															+                    "SF6",
														
 
															+                    "NF3",
														
 
															+                ],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															             # AFOLU downscaling. Most is zero anyway
														
 
															-            '3C': {
														
 
															-                'basket': '3.C',
														
 
															-                'basket_contents': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
														
 
															-                                    '3.C.6', '3.C.7', '3.C.8'],
														
 
															-                'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "3C": {
														
 
															+                "basket": "3.C",
														
 
															+                "basket_contents": [
														
 
															+                    "3.C.1",
														
 
															+                    "3.C.2",
														
 
															+                    "3.C.3",
														
 
															+                    "3.C.4",
														
 
															+                    "3.C.5",
														
 
															+                    "3.C.6",
														
 
															+                    "3.C.7",
														
 
															+                    "3.C.8",
														
 
															+                ],
														
 
															+                "entities": ["CO2", "CH4", "N2O"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            '3C1': {
														
 
															-                'basket': '3.C.1',
														
 
															-                'basket_contents': ['3.C.1.a', '3.C.1.b', '3.C.1.c', '3.C.1.d'],
														
 
															-                'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "3C1": {
														
 
															+                "basket": "3.C.1",
														
 
															+                "basket_contents": ["3.C.1.a", "3.C.1.b", "3.C.1.c", "3.C.1.d"],
														
 
															+                "entities": ["CO2", "CH4", "N2O"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															-            '3D': {
														
 
															-                'basket': '3.D',
														
 
															-                'basket_contents': ['3.D.1', '3.D.2'],
														
 
															-                'entities': ['CO2', 'CH4', 'N2O'],
														
 
															-                'dim': 'category (IPCC2006_PRIMAP)',
														
 
															+            "3D": {
														
 
															+                "basket": "3.D",
														
 
															+                "basket_contents": ["3.D.1", "3.D.2"],
														
 
															+                "entities": ["CO2", "CH4", "N2O"],
														
 
															+                "dim": "category (IPCC2006_PRIMAP)",
														
 
															             },
														
 
															         },
														
 
															-        'entities': {
														
 
															-            'HFCS': {
														
 
															-                'basket': f'HFCS ({gwp_to_use})',
														
 
															-                'basket_contents': ['HFC125', 'HFC134a', 'HFC143a', 'HFC23',
														
 
															-                                    'HFC32', 'HFC4310mee', 'HFC227ea'],
														
 
															-                'sel': {'category (IPCC2006_PRIMAP)':
														
 
															-                            ['0', '2', '2.C', '2.E',
														
 
															-                             '2.F', '2.G', '2.H']},
														
 
															+        "entities": {
														
 
															+            "HFCS": {
														
 
															+                "basket": f"HFCS ({gwp_to_use})",
														
 
															+                "basket_contents": [
														
 
															+                    "HFC125",
														
 
															+                    "HFC134a",
														
 
															+                    "HFC143a",
														
 
															+                    "HFC23",
														
 
															+                    "HFC32",
														
 
															+                    "HFC4310mee",
														
 
															+                    "HFC227ea",
														
 
															+                ],
														
 
															+                "sel": {
														
 
															+                    "category (IPCC2006_PRIMAP)": [
														
 
															+                        "0",
														
 
															+                        "2",
														
 
															+                        "2.C",
														
 
															+                        "2.E",
														
 
															+                        "2.F",
														
 
															+                        "2.G",
														
 
															+                        "2.H",
														
 
															+                    ]
														
 
															+                },
														
 
															             },
														
 
															-            'PFCS': {
														
 
															-                'basket': f'PFCS ({gwp_to_use})',
														
 
															-                'basket_contents': ['C2F6', 'C3F8', 'CF4', 'cC4F8'],
														
 
															-                'sel': {'category (IPCC2006_PRIMAP)':
														
 
															-                            ['0', '2', '2.C', '2.E',
														
 
															-                             '2.F', '2.G', '2.H']},
														
 
															+            "PFCS": {
														
 
															+                "basket": f"PFCS ({gwp_to_use})",
														
 
															+                "basket_contents": ["C2F6", "C3F8", "CF4", "cC4F8"],
														
 
															+                "sel": {
														
 
															+                    "category (IPCC2006_PRIMAP)": [
														
 
															+                        "0",
														
 
															+                        "2",
														
 
															+                        "2.C",
														
 
															+                        "2.E",
														
 
															+                        "2.F",
														
 
															+                        "2.G",
														
 
															+                        "2.H",
														
 
															+                    ]
														
 
															+                },
														
 
															             },
														
 
															-        }
														
 
															+        },
														
 
															     },
														
 
															-    'remove_ts': {
														
 
															-        'fgases': { # unnecessary and complicates aggregation for
														
 
															+    "remove_ts": {
														
 
															+        "fgases": {  # unnecessary and complicates aggregation for
														
 
															             # other gases
														
 
															-            'category': ['5', '5.B'],
														
 
															-            'entities': [f'HFCS ({gwp_to_use})', f'PFCS ({gwp_to_use})', 'SF6', 'NF3'],
														
 
															+            "category": ["5", "5.B"],
														
 
															+            "entities": [f"HFCS ({gwp_to_use})", f"PFCS ({gwp_to_use})", "SF6", "NF3"],
														
 
															         },
														
 
															-        'CH4': { # inconsistent with IPPU sector
														
 
															-            'category': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
														
 
															-            'entities': ['CH4'],
														
 
															+        "CH4": {  # inconsistent with IPPU sector
														
 
															+            "category": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
														
 
															+            "entities": ["CH4"],
														
 
															         },
														
 
															     },
														
 
															     # 'basket_copy': {
														
@@ -488,6 +643,3 @@ processing_info_step2 =  {
 
															     #     'source_GWP': gwp_to_use,
														
 
															     # },
														
 
															 }
														
 
															-
														
 
															-
														
 
															-
														
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/read_SGP_BUR5_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/read_SGP_BUR5_from_pdf.py
@@ -1,12 +1,26 @@
 
															-# read Singapore fifth BUR from pdf
														
 
															+"""
														
 
															+Read Singapore's BUR5 from pdf
														
 
															+This script reads data from Singapore's BUR5
														
 
															+Data are read from pdf using camelot
														
 
															+"""
														
 
															 import locale
														
 
															-#import numpy as np
														
 
															+# import numpy as np
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
														
 
															+
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    fix_rows,
														
 
															+    gas_baskets,
														
 
															+    process_data_for_country,
														
 
															+)
														
 
															+
														
 
															 from .config_sgp_bur5 import (
														
 
															     cat_code_regexp,
														
 
															     cat_codes_manual,
														
@@ -26,29 +40,20 @@ from .config_sgp_bur5 import (
 
															     table_defs,
														
 
															     values_replacement,
														
 
															 )
														
 
															-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
														
 
															-
														
 
															-from unfccc_ghg_data.helper import (
														
 
															-    downloaded_data_path,
														
 
															-    extracted_data_path,
														
 
															-    fix_rows,
														
 
															-    gas_baskets,
														
 
															-    process_data_for_country,
														
 
															-)
														
 
															 if __name__ == "__main__":
														
 
															     ### genral configuration
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Singapore' / 'BUR5'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Singapore'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Singapore" / "BUR5"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Singapore"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'SGP_BUR5_2022_'
														
 
															-    inventory_file_pdf = 'Singapore_-_NC5BUR5.pdf'
														
 
															-    #years_to_read = range(1990, 2018 + 1)
														
 
															+    output_filename = "SGP_BUR5_2022_"
														
 
															+    inventory_file_pdf = "Singapore_-_NC5BUR5.pdf"
														
 
															+    # years_to_read = range(1990, 2018 + 1)
														
 
															     # define locale to use for str to float conversion
														
 
															-    locale_to_use = 'en_SG.UTF-8'
														
 
															+    locale_to_use = "en_SG.UTF-8"
														
 
															     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
														
 
															     pagesToRead = table_defs.keys()
														
@@ -69,9 +74,14 @@ if __name__ == "__main__":
 
															             print(f"Reading table {table_on_page}")
														
 
															             area = table_def_templates[table_on_page]["area"]
														
 
															             cols = table_def_templates[table_on_page]["cols"]
														
 
															-            tables = camelot.read_pdf(str(input_folder / inventory_file_pdf),
														
 
															-                                      pages=str(page), flavor='stream',
														
 
															-                                      table_areas=area, columns=cols, split_text=True)
														
 
															+            tables = camelot.read_pdf(
														
 
															+                str(input_folder / inventory_file_pdf),
														
 
															+                pages=str(page),
														
 
															+                flavor="stream",
														
 
															+                table_areas=area,
														
 
															+                columns=cols,
														
 
															+                split_text=True,
														
 
															+            )
														
 
															             df_current = tables[0].df.copy(deep=True)
														
 
															             # drop the old header
														
@@ -79,39 +89,52 @@ if __name__ == "__main__":
 
															                 df_current = df_current.drop(table_defs[page]["drop_rows"])
														
 
															             elif "drop_rows" in table_def_templates[table_on_page].keys():
														
 
															                 df_current = df_current.drop(
														
 
															-                    table_def_templates[table_on_page]["drop_rows"])
														
 
															+                    table_def_templates[table_on_page]["drop_rows"]
														
 
															+                )
														
 
															             # add new header
														
 
															-            if 'header' in table_defs[page].keys():
														
 
															+            if "header" in table_defs[page].keys():
														
 
															                 df_current.columns = pd.MultiIndex.from_tuples(
														
 
															-                    zip(table_defs[page]['header']['entity'],
														
 
															-                        table_defs[page]['header']['unit']))
														
 
															+                    zip(
														
 
															+                        table_defs[page]["header"]["entity"],
														
 
															+                        table_defs[page]["header"]["unit"],
														
 
															+                    )
														
 
															+                )
														
 
															             else:
														
 
															                 df_current.columns = pd.MultiIndex.from_tuples(
														
 
															-                    zip(table_def_templates[table_on_page]['header']['entity'],
														
 
															-                        table_def_templates[table_on_page]['header']['unit']))
														
 
															+                    zip(
														
 
															+                        table_def_templates[table_on_page]["header"]["entity"],
														
 
															+                        table_def_templates[table_on_page]["header"]["unit"],
														
 
															+                    )
														
 
															+                )
														
 
															             # drop cols if necessary
														
 
															             if "drop_cols" in table_defs[page].keys():
														
 
															-                # print(df_current.columns.values)
														
 
															+                # print(df_current.columns.to_numpy())
														
 
															                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
														
 
															             elif "drop_cols" in table_def_templates[table_on_page].keys():
														
 
															                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
														
 
															             # rename category column
														
 
															-            df_current.rename(columns={table_defs[page]["category_col"]: index_cols[0]},
														
 
															-                              inplace=True)
														
 
															+            df_current = df_current.rename(
														
 
															+                columns={table_defs[page]["category_col"]: index_cols[0]}
														
 
															+            )
														
 
															             # replace double \n
														
 
															             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
														
 
															             # replace double and triple spaces
														
 
															-            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
														
 
															+            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace(
														
 
															+                "   ", " "
														
 
															+            )
														
 
															             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
														
 
															             # fix the split rows
														
 
															             for n_rows in table_def_templates[table_on_page]["rows_to_fix"].keys():
														
 
															-                df_current = fix_rows(df_current,
														
 
															-                                      table_def_templates[table_on_page]["rows_to_fix"][
														
 
															-                                          n_rows], index_cols[0], n_rows)
														
 
															+                df_current = fix_rows(
														
 
															+                    df_current,
														
 
															+                    table_def_templates[table_on_page]["rows_to_fix"][n_rows],
														
 
															+                    index_cols[0],
														
 
															+                    n_rows,
														
 
															+                )
														
 
															             # replace category names with typos
														
 
															             df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
														
@@ -122,7 +145,7 @@ if __name__ == "__main__":
 
															             # set index
														
 
															             # df_current = df_current.set_index(index_cols)
														
 
															             # strip trailing and leading  and remove "^"
														
 
															-            for col in df_current.columns.values:
														
 
															+            for col in df_current.columns.to_numpy():
														
 
															                 df_current[col] = df_current[col].str.strip()
														
 
															                 df_current[col] = df_current[col].str.replace("^", "")
														
@@ -132,19 +155,24 @@ if __name__ == "__main__":
 
															                 df_this_page = df_current.copy(deep=True)
														
 
															             else:
														
 
															                 # find intersecting cols
														
 
															-                cols_this_page = df_this_page.columns.values
														
 
															+                cols_this_page = df_this_page.columns.to_numpy()
														
 
															                 # print(f"cols this page: {cols_this_page}")
														
 
															-                cols_current = df_current.columns.values
														
 
															+                cols_current = df_current.columns.to_numpy()
														
 
															                 # print(f"cols current: {cols_current}")
														
 
															                 cols_both = list(set(cols_this_page).intersection(set(cols_current)))
														
 
															                 # print(f"cols both: {cols_both}")
														
 
															                 if len(cols_both) > 0:
														
 
															-                    df_this_page = df_this_page.merge(df_current, how='outer', on=cols_both,
														
 
															-                                                      suffixes=(None, None))
														
 
															+                    df_this_page = df_this_page.merge(
														
 
															+                        df_current, how="outer", on=cols_both, suffixes=(None, None)
														
 
															+                    )
														
 
															                 else:
														
 
															-                    df_this_page = df_this_page.merge(df_current, how='outer',
														
 
															-                                                      left_index=True, right_index=True,
														
 
															-                                                      suffixes=(None, None))
														
 
															+                    df_this_page = df_this_page.merge(
														
 
															+                        df_current,
														
 
															+                        how="outer",
														
 
															+                        left_index=True,
														
 
															+                        right_index=True,
														
 
															+                        suffixes=(None, None),
														
 
															+                    )
														
 
															                 df_this_page = df_this_page.groupby(index_cols).first().reset_index()
														
 
															                 # print(df_this_page)
														
@@ -152,28 +180,34 @@ if __name__ == "__main__":
 
															         # set index and convert to long format
														
 
															         df_this_page = df_this_page.set_index(index_cols)
														
 
															-        df_this_page_long = pm2.pm2io.nir_convert_df_to_long(df_this_page,
														
 
															-                                                             table_defs[page]["year"],
														
 
															-                                                             header_long)
														
 
															+        df_this_page_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+            df_this_page, table_defs[page]["year"], header_long
														
 
															+        )
														
 
															         # drop the rows with memo items etc
														
 
															         for cat in cats_remove:
														
 
															             df_this_page_long = df_this_page_long.drop(
														
 
															-                df_this_page_long.loc[df_this_page_long.loc[:, index_cols[0]] == cat].index)
														
 
															+                df_this_page_long.loc[
														
 
															+                    df_this_page_long.loc[:, index_cols[0]] == cat
														
 
															+                ].index
														
 
															+            )
														
 
															         # make a copy of the categories row
														
 
															         df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, index_cols[0]]
														
 
															         # replace cat names by codes in col "Categories"
														
 
															         # first the manual replacements
														
 
															-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, "category"].replace(
														
 
															-            cat_codes_manual)
														
 
															+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
														
 
															+            :, "category"
														
 
															+        ].replace(cat_codes_manual)
														
 
															+
														
 
															         # then the regex repalcements
														
 
															-        def repl(m):
														
 
															-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:,
														
 
															-                                               "category"].str.replace(cat_code_regexp,
														
 
															-                                                                       repl, regex=True)
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
														
 
															+
														
 
															+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
														
 
															+            :, "category"
														
 
															+        ].str.replace(cat_code_regexp, repl, regex=True)
														
 
															         df_this_page_long.loc[:, "category"].unique()
														
 
															         # strip spaces in data col
														
@@ -185,27 +219,29 @@ if __name__ == "__main__":
 
															         df_this_page_long.columns = df_this_page_long.columns.map(str)
														
 
															         # remove thousands separators as pd.to_numeric can't deal with that
														
 
															-        df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(',',
														
 
															-                                                                                        '')
														
 
															+        df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(
														
 
															+            ",", ""
														
 
															+        )
														
 
															         # drop orig cat name as it's not unique over all tables (keep until here in case
														
 
															         # it's needed for debugging)
														
 
															-        df_this_page_long = df_this_page_long.drop(columns='orig_cat_name')
														
 
															+        df_this_page_long = df_this_page_long.drop(columns="orig_cat_name")
														
 
															         data_page_if = pm2.pm2io.convert_long_dataframe_if(
														
 
															             df_this_page_long,
														
 
															             coords_cols=coords_cols,
														
 
															-            #add_coords_cols=add_coords_cols,
														
 
															+            # add_coords_cols=add_coords_cols,
														
 
															             coords_defaults=coords_defaults,
														
 
															             coords_terminologies=coords_terminologies,
														
 
															             coords_value_mapping=coords_value_mapping[
														
 
															-                table_defs[page]["coords_value_mapping"]],
														
 
															+                table_defs[page]["coords_value_mapping"]
														
 
															+            ],
														
 
															             # coords_value_filling=coords_value_filling,
														
 
															             filter_remove=filter_remove,
														
 
															             # filter_keep=filter_keep,
														
 
															             meta_data=meta_data,
														
 
															             convert_str=True,
														
 
															-            time_format='%Y',
														
 
															+            time_format="%Y",
														
 
															         )
														
 
															         # conversion to PRIMAP2 native format
														
@@ -226,13 +262,16 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw"), data_if)
														
 
															+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															+        data_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															     data_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															-
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     #### processing
														
 
															     data_proc_pm2 = data_pm2
														
@@ -246,22 +285,21 @@ if __name__ == "__main__":
 
															         processing_info_country=processing_info_step1,
														
 
															     )
														
 
															-
														
 
															     data_proc_pm2 = process_data_for_country(
														
 
															         data_proc_pm2,
														
 
															         entities_to_ignore=[],
														
 
															         gas_baskets=gas_baskets,
														
 
															         processing_info_country=processing_info_step2,
														
 
															-        cat_terminology_out = terminology_proc,
														
 
															-        #category_conversion = None,
														
 
															-        #sectors_out = None,
														
 
															+        cat_terminology_out=terminology_proc,
														
 
															+        # category_conversion = None,
														
 
															+        # sectors_out = None,
														
 
															     )
														
 
															     # adapt source and metadata
														
 
															     # TODO: processing info is present twice
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -270,10 +308,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															-
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Taiwan's inventories
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'TWN'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=TWN
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2022.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2022.py
@@ -1,4 +1,10 @@
 
															-# config and functions for Taiwan NIR 2022
														
 
															+"""Config for Taiwan's 2022 inventory
														
 
															+
														
 
															+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
														
 
															+config and metadata are define din the reading script
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 from typing import Union
														
@@ -6,9 +12,36 @@ import pandas as pd
 
															 gwp_to_use = "AR4GWP100"
														
 
															-def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int)->pd.DataFrame:
														
 
															+
														
 
															+def fix_rows(
														
 
															+    data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
														
 
															+) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Combine split rows
														
 
															+
														
 
															+    This function combines rows which have been split into several rows during data
														
 
															+    reading from pdf because they contained line breaks.
														
 
															+
														
 
															+    Parameters
														
 
															+    ----------
														
 
															+    data: pd.DataFrame
														
 
															+        The data to work with
														
 
															+    rows_to_fix: list
														
 
															+        List of values for which to fix rows
														
 
															+    col_to_use: str
														
 
															+        column to use to find the rows to merge
														
 
															+    n_rows: int
														
 
															+        How many rows to combine for each row found. e.g. 3 means combine the found
														
 
															+        row with the following two rows. Negative values are used for more
														
 
															+        complicated situations where the rows to merge are also before the position
														
 
															+        of the value that indicates the merge. See code for details
														
 
															+
														
 
															+    Returns
														
 
															+    -------
														
 
															+        pandas DataFrame with combined rows. The individual rows are removed
														
 
															+    """
														
 
															     for row in rows_to_fix:
														
 
															-        #print(row)
														
 
															+        # print(row)
														
 
															         # find the row number and collect the row and the next two rows
														
 
															         index = data.loc[data[col_to_use] == row].index
														
 
															         if not list(index):
														
@@ -20,35 +53,35 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
 
															         for item in index:
														
 
															             loc = data.index.get_loc(item)
														
 
															             ####print(data[col_to_use].loc[loc + 1])
														
 
															-            if n_rows == -2:
														
 
															+            if n_rows == -2:  # noqa: PLR2004
														
 
															                 locs_to_merge = list(range(loc - 1, loc + 1))
														
 
															                 loc_to_check = loc - 1
														
 
															-            #if n_rows == -3:
														
 
															+            # if n_rows == -3:
														
 
															             #    locs_to_merge = list(range(loc - 1, loc + 2))
														
 
															-            #elif n_rows == -5:
														
 
															+            # elif n_rows == -5:
														
 
															             #    locs_to_merge = list(range(loc - 1, loc + 4))
														
 
															             else:
														
 
															                 locs_to_merge = list(range(loc, loc + n_rows))
														
 
															                 loc_to_check = loc + 1
														
 
															-            if data[col_to_use].loc[loc_to_check] == '':
														
 
															+            if not data[col_to_use].loc[loc_to_check]:
														
 
															                 rows_to_merge = data.iloc[locs_to_merge]
														
 
															                 indices_to_merge = rows_to_merge.index
														
 
															                 # replace numerical NaN values
														
 
															                 ####print(rows_to_merge)
														
 
															-                rows_to_merge = rows_to_merge.fillna('')
														
 
															+                rows_to_merge = rows_to_merge.fillna("")
														
 
															                 ####print("fillna")
														
 
															                 ####print(rows_to_merge)
														
 
															                 # join the three rows
														
 
															-                new_row = rows_to_merge.agg(' '.join)
														
 
															+                new_row = rows_to_merge.agg(" ".join)
														
 
															                 # replace the double spaces that are created
														
 
															                 # must be done here and not at the end as splits are not always
														
 
															                 # the same and join would produce different col values
														
 
															                 new_row = new_row.str.replace("  ", " ")
														
 
															                 new_row = new_row.str.strip()
														
 
															-                #new_row = new_row.str.replace("N O", "NO")
														
 
															-                #new_row = new_row.str.replace(", N", ",N")
														
 
															-                #new_row = new_row.str.replace("- ", "-")
														
 
															+                # new_row = new_row.str.replace("N O", "NO")
														
 
															+                # new_row = new_row.str.replace(", N", ",N")
														
 
															+                # new_row = new_row.str.replace("- ", "-")
														
 
															                 data.loc[indices_to_merge[0]] = new_row
														
 
															                 indices_to_drop = indices_to_drop + list(indices_to_merge[1:])
														
@@ -56,12 +89,43 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
 
															         data = data.reset_index(drop=True)
														
 
															     return data
														
 
															-def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: list[Union[int, str]])->pd.DataFrame:
														
 
															+
														
 
															+def make_wide_table(
														
 
															+    data: pd.DataFrame,
														
 
															+    keyword: str,
														
 
															+    col: Union[int, str],
														
 
															+    index_cols: list[Union[int, str]],
														
 
															+) -> pd.DataFrame:
														
 
															+    """
														
 
															+    Transform a table with sections for gases to a gas-wide table
														
 
															+
														
 
															+    Some tables are rolled up, i.e. the header repeats within the table and the
														
 
															+    tables are composed of several tables for different year ranges stacked on top of
														
 
															+    each other. These tables are unrolled and converted to a proper time-wide format
														
 
															+    without repetition of headers.
														
 
															+
														
 
															+    Parameters
														
 
															+    ----------
														
 
															+    data: pd.DataFrame
														
 
															+        the data to convert
														
 
															+    keyword: str
														
 
															+        The keyword used to identify the header, e.g. 'GHG Emission Sources and Sinks'
														
 
															+    col: int
														
 
															+        Column to look for the keyword
														
 
															+    index_cols: list[Union[int, str]]
														
 
															+        Columns to use as index for the output DataFrame
														
 
															+
														
 
															+
														
 
															+    Returns
														
 
															+    -------
														
 
															+        pandas DataFrame in time-wide format
														
 
															+
														
 
															+    """
														
 
															     index = data.loc[data[col] == keyword].index
														
 
															     if not list(index):
														
 
															         print("Keyword for table transformation not found")
														
 
															         return data
														
 
															-    elif len(index)==1:
														
 
															+    elif len(index) == 1:
														
 
															         print("Keyword for table transformation found only once")
														
 
															         return data
														
 
															     else:
														
@@ -74,83 +138,88 @@ def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], inde
 
															                 next_loc = data.index[-1] + 1
														
 
															             df_to_add = data.loc[list(range(loc, next_loc))]
														
 
															             # select only cols which don't have NaN, Null, or '' as header
														
 
															-            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
														
 
															-            df_to_add = df_to_add.loc[: , filter_nan]
														
 
															+            filter_nan = (
														
 
															+                (~df_to_add.iloc[0].isna())
														
 
															+                & (df_to_add.iloc[0] != "NaN")
														
 
															+                & (df_to_add.iloc[0])
														
 
															+            )
														
 
															+            df_to_add = df_to_add.loc[:, filter_nan]
														
 
															             df_to_add.columns = df_to_add.iloc[0]
														
 
															-            #print(df_to_add.columns)
														
 
															+            # print(df_to_add.columns)
														
 
															             df_to_add = df_to_add.drop(loc)
														
 
															             df_to_add = df_to_add.set_index(index_cols)
														
 
															             if df_all is None:
														
 
															                 df_all = df_to_add
														
 
															             else:
														
 
															-                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
														
 
															+                df_all = pd.concat([df_all, df_to_add], axis=1, join="outer")
														
 
															         return df_all
														
 
															 # page defs tp hold information on reading the table
														
 
															 page_defs = {
														
 
															-    '5': {
														
 
															-        "table_areas": ['36,523,563,68'],
														
 
															+    "5": {
														
 
															+        "table_areas": ["36,523,563,68"],
														
 
															         "split_text": False,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '6': {
														
 
															-        "table_areas": ['34,562,563,53'],
														
 
															-        #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
														
 
															+    "6": {
														
 
															+        "table_areas": ["34,562,563,53"],
														
 
															+        # "columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '7': {
														
 
															-        "table_areas": ['36,740,499,482', '36,430,564,53'],
														
 
															+    "7": {
														
 
															+        "table_areas": ["36,740,499,482", "36,430,564,53"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '8': {
														
 
															-        "table_areas": ['35,748,503,567'],
														
 
															+    "8": {
														
 
															+        "table_areas": ["35,748,503,567"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '9': {
														
 
															-        "table_areas": ['35,747,565,315', '36,273,565,50'],
														
 
															+    "9": {
														
 
															+        "table_areas": ["35,747,565,315", "36,273,565,50"],
														
 
															         "split_text": False,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '11': {
														
 
															-        "table_areas": ['35,744,563,434'],
														
 
															+    "11": {
														
 
															+        "table_areas": ["35,744,563,434"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '12': {
														
 
															-        "table_areas": ['33,747,562,86'],
														
 
															+    "12": {
														
 
															+        "table_areas": ["33,747,562,86"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '13': {
														
 
															-        "table_areas": ['34,303,564,54'],
														
 
															+    "13": {
														
 
															+        "table_areas": ["34,303,564,54"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '14': {
														
 
															-        "table_areas": ['34,754,564,256'],
														
 
															-        "columns": ['220,251,283,314,344,371,406,438,470,500,530'],
														
 
															+    "14": {
														
 
															+        "table_areas": ["34,754,564,256"],
														
 
															+        "columns": ["220,251,283,314,344,371,406,438,470,500,530"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '15': {
														
 
															-        "table_areas": ['34,487,564,42'],
														
 
															+    "15": {
														
 
															+        "table_areas": ["34,487,564,42"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
 
															-    '16': {
														
 
															-        "table_areas": ['34,418,564,125'],
														
 
															-        #"columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
														
 
															+    "16": {
														
 
															+        "table_areas": ["34,418,564,125"],
														
 
															+        # "columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
														
 
															         "split_text": True,
														
 
															         "flavor": "lattice",
														
 
															-    }, # with stream the row index is messed up with lattice the column index ... red with lattice and fix col header manualy
														
 
															-    '17': {
														
 
															-        "table_areas": ['34,534,564,49'],
														
 
															-        "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
														
 
															+    },  # with stream the row index is messed up with lattice the column index ...
														
 
															+    # read with lattice and fix col header manually
														
 
															+    "17": {
														
 
															+        "table_areas": ["34,534,564,49"],
														
 
															+        "columns": ["188,232,263,298,331,362,398,432,464,497,530"],
														
 
															         "split_text": True,
														
 
															         "flavor": "stream",
														
 
															     },
														
@@ -158,38 +227,40 @@ page_defs = {
 
															 # table defs to hold information on how to process the tables
														
 
															 table_defs = {
														
 
															-    'ES2.2': { # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
														
 
															+    "ES2.2": {  # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
														
 
															         "tables": [1, 2],
														
 
															         "rows_to_fix": {
														
 
															             0: {
														
 
															-                3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
														
 
															-                    '2.D Non-Energy Products from Fuels and',
														
 
															-                    '4. Land Use, Land Use Change and Forestry'],
														
 
															+                3: [
														
 
															+                    "1.A.4.c Agriculture, Forestry, Fishery, and",
														
 
															+                    "2.D Non-Energy Products from Fuels and",
														
 
															+                    "4. Land Use, Land Use Change and Forestry",
														
 
															+                ],
														
 
															             },
														
 
															         },
														
 
															-        "index_cols": ['GHG Emission Source and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Source and Sinks',
														
 
															+        "index_cols": ["GHG Emission Source and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Source and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "entity": "CO2",
														
 
															         "unit": "kt",
														
 
															         "cat_codes_manual": {
														
 
															-            'Net GHG Emission (including LULUCF)': '0',
														
 
															-            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
														
 
															+            "Net GHG Emission (including LULUCF)": "0",
														
 
															+            "Total GHG Emission (excluding LULUCF)": "M.0.EL",
														
 
															         },
														
 
															     },
														
 
															-    'ES2.3': { # 1990-2020 Methane Emissions in Taiwan
														
 
															+    "ES2.3": {  # 1990-2020 Methane Emissions in Taiwan
														
 
															         "tables": [3, 4],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "entity": f"CH4 ({gwp_to_use})",
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total Methane Emissions': '0',
														
 
															+            "Total Methane Emissions": "0",
														
 
															         },
														
 
															     },
														
 
															-    'ES2.4': { # 1990-2020 Nitrous Oxide Emissions in Taiwan
														
 
															+    "ES2.4": {  # 1990-2020 Nitrous Oxide Emissions in Taiwan
														
 
															         "tables": [5],
														
 
															         "fix_cats": {
														
 
															             0: {
														
@@ -197,33 +268,33 @@ table_defs = {
 
															             },
														
 
															         },
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "entity": f"N2O ({gwp_to_use})",
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total Nitrous Oxide Emissions': '0',
														
 
															+            "Total Nitrous Oxide Emissions": "0",
														
 
															         },
														
 
															     },
														
 
															-    'ES3.1': { # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
														
 
															+    "ES3.1": {  # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
														
 
															         "tables": [7],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "entity": f"KYOTOGHG ({gwp_to_use})",
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Net GHG Emission (including LULUCF)': '0',
														
 
															-            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
														
 
															+            "Net GHG Emission (including LULUCF)": "0",
														
 
															+            "Total GHG Emission (excluding LULUCF)": "M.0.EL",
														
 
															         },
														
 
															     },
														
 
															-    'ES3.2': { # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
														
 
															+    "ES3.2": {  # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
														
 
															         "tables": [8],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "gas_splitting": {
														
 
															             "Total CO2 Emission": "CO2",
														
@@ -234,17 +305,18 @@ table_defs = {
 
															         },
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total CO2 Emission': '1',
														
 
															-            'Total CH4 Emission': '1',
														
 
															-            'Total N2O Emission': '1',
														
 
															-            'Total Emission from Energy Sector': '1',
														
 
															+            "Total CO2 Emission": "1",
														
 
															+            "Total CH4 Emission": "1",
														
 
															+            "Total N2O Emission": "1",
														
 
															+            "Total Emission from Energy Sector": "1",
														
 
															         },
														
 
															     },
														
 
															-    'ES3.3': { # 1990-2020 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
														
 
															-        "tables": [9,10],
														
 
															+    "ES3.3": {  # 1990-2020 Greenhouse Gas Emissions Produced by Industrial
														
 
															+        # Process and Product Use Sector (IPPU) in Taiwan
														
 
															+        "tables": [9, 10],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "gas_splitting": {
														
 
															             "Total CO2 Emission": "CO2",
														
@@ -259,24 +331,26 @@ table_defs = {
 
															         },
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total CO2 Emission': '2',
														
 
															-            'Total CH4 Emission': '2',
														
 
															-            'Total N2O Emission': '2',
														
 
															-            'Total HFCs Emission': '2',
														
 
															-            'Total PFCs Emission (2.E Electronics Industry)': '2.E',
														
 
															-            'Total SF6 Emission': '2',
														
 
															-            'Total NF3 Emission (2.E Electronics Industry)': '2.E',
														
 
															-            'Total Emission from IPPU Sector': '2',
														
 
															+            "Total CO2 Emission": "2",
														
 
															+            "Total CH4 Emission": "2",
														
 
															+            "Total N2O Emission": "2",
														
 
															+            "Total HFCs Emission": "2",
														
 
															+            "Total PFCs Emission (2.E Electronics Industry)": "2.E",
														
 
															+            "Total SF6 Emission": "2",
														
 
															+            "Total NF3 Emission (2.E Electronics Industry)": "2.E",
														
 
															+            "Total Emission from IPPU Sector": "2",
														
 
															         },
														
 
															         "drop_rows": [
														
 
															-            ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
														
 
															-        ]
														
 
															+            ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"),  # has lower
														
 
															+            # significant digits than in table ES2.2
														
 
															+        ],
														
 
															     },
														
 
															-    'ES3.4': { # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
														
 
															+    "ES3.4": {  # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector
														
 
															+        # in Taiwan
														
 
															         "tables": [11],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															         "gas_splitting": {
														
 
															             "Total CO2 Emission (3.H Urea applied)": "CO2",
														
@@ -287,22 +361,22 @@ table_defs = {
 
															         },
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total CO2 Emission (3.H Urea applied)': '3.H',
														
 
															-            'Total CH4 Emission': '3',
														
 
															-            'Total N2O Emission': '3',
														
 
															-            'Total Emission From Agriculture Sector': '3',
														
 
															+            "Total CO2 Emission (3.H Urea applied)": "3.H",
														
 
															+            "Total CH4 Emission": "3",
														
 
															+            "Total N2O Emission": "3",
														
 
															+            "Total Emission From Agriculture Sector": "3",
														
 
															         },
														
 
															     },
														
 
															-    'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
														
 
															+    "ES3.6": {  # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
														
 
															         "tables": [13],
														
 
															         "rows_to_fix": {
														
 
															             0: {
														
 
															                 3: ["Total CO2 Emission"],
														
 
															             },
														
 
															         },
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															-        "col_wide_kwd": 0, # two column header
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															+        "col_wide_kwd": 0,  # two column header
														
 
															         "gas_splitting": {
														
 
															             "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
														
 
															             "Total CH4 Emission": f"CH4 ({gwp_to_use})",
														
@@ -312,51 +386,51 @@ table_defs = {
 
															         },
														
 
															         "unit": "ktCO2eq",
														
 
															         "cat_codes_manual": {
														
 
															-            'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
														
 
															-            'Total CH4 Emission': '5',
														
 
															-            'Total N2O Emission': '5',
														
 
															-            'Total Emission from Waste Sector': '5',
														
 
															+            "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "5.C",
														
 
															+            "Total CH4 Emission": "5",
														
 
															+            "Total N2O Emission": "5",
														
 
															+            "Total Emission from Waste Sector": "5",
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 table_defs_skip = {
														
 
															-    'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
														
 
															+    "ES2.1": {  # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
														
 
															         "tables": [0],
														
 
															         "rows_to_fix": {
														
 
															             0: {
														
 
															-                3: ['CO2'],
														
 
															+                3: ["CO2"],
														
 
															             },
														
 
															             1: {  # wherte col 0 is empty
														
 
															-                3: ['Net GHG Emission', 'Total GHG Emission'],
														
 
															+                3: ["Net GHG Emission", "Total GHG Emission"],
														
 
															             },
														
 
															         },
														
 
															-        "index_cols": ['GHG', 'GWP'],
														
 
															-        "wide_keyword": 'GHG',
														
 
															+        "index_cols": ["GHG", "GWP"],
														
 
															+        "wide_keyword": "GHG",
														
 
															         "col_wide_kwd": 0,
														
 
															         "unit": "ktCO2eq",
														
 
															     },
														
 
															-    'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
														
 
															+    "ES2.5": {  # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
														
 
															         "tables": [6],
														
 
															         "rows_to_fix": {
														
 
															             0: {
														
 
															-                -2: ['Total SF6 Emissions',
														
 
															-                     'Total NF3 Emissions'],
														
 
															+                -2: ["Total SF6 Emissions", "Total NF3 Emissions"],
														
 
															             },
														
 
															         },
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'],
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															         "col_wide_kwd": 0,
														
 
															-        #"entity": "CO2",
														
 
															+        # "entity": "CO2",
														
 
															         "unit": "ktCO2eq",
														
 
															     },
														
 
															-    'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
														
 
															+    "ES3.5": {  # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF
														
 
															+        # Sector in Taiwan2],
														
 
															         "tables": [12],
														
 
															         "rows_to_fix": {},
														
 
															-        "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
														
 
															-        "wide_keyword": 'GHG Emission Sources and Sinks',
														
 
															-        "col_wide_kwd": 0, # two column header
														
 
															+        "index_cols": ["GHG Emission Sources and Sinks"],  # header is merged col :-(
														
 
															+        "wide_keyword": "GHG Emission Sources and Sinks",
														
 
															+        "col_wide_kwd": 0,  # two column header
														
 
															         "unit": "kt",
														
 
															         "entity": "CO2",
														
 
															-    }, # need to consider the two columns specially (merge?)
														
 
															+    },  # need to consider the two columns specially (merge?)
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
@@ -1,14 +1,24 @@
 
															-# this script reads data from Taiwan's 2022 national inventory
														
 
															-# Data is read from the english summary pdf
														
 
															-# TODO: add further GWPs and gas baskets
														
 
															+"""
														
 
															+Read Taiwan's 2022 national inventory from pdf
														
 
															+
														
 
															+This script reads data from Taiwan's 2022 national inventory
														
 
															+Data are read from the english summary pdf
														
 
															+TODO: add further GWPs and gas baskets
														
 
															+
														
 
															+"""
														
 
															 import copy
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_twn_nir2022 import (fix_rows, gwp_to_use, make_wide_table, page_defs,
														
 
															-                                 table_defs)
														
 
															+from config_twn_nir2022 import (
														
 
															+    fix_rows,
														
 
															+    gwp_to_use,
														
 
															+    make_wide_table,
														
 
															+    page_defs,
														
 
															+    table_defs,
														
 
															+)
														
 
															 from primap2.pm2io._data_reading import matches_time_format
														
 
															 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
@@ -17,16 +27,16 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Taiwan'
														
 
															+    input_folder = downloaded_data_path / "non-UNFCCC" / "Taiwan"
														
 
															     # TODO: move file to subfolder
														
 
															-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Taiwan'
														
 
															+    output_folder = extracted_data_path / "non-UNFCCC" / "Taiwan"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    output_filename = 'TWN_inventory_2022_'
														
 
															-    inventory_file = '00_abstract_en.pdf'
														
 
															+    output_filename = "TWN_inventory_2022_"
														
 
															+    inventory_file = "00_abstract_en.pdf"
														
 
															-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
														
 
															+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*"
														
 
															     time_format = "%Y"
														
@@ -79,42 +89,49 @@ if __name__ == "__main__":
 
															     # config for part3: mapping to 2006 categpries
														
 
															     cat_mapping = {
														
 
															-        '3': 'M.AG',
														
 
															-        '3.A': '3.A.1',
														
 
															-        '3.B': '3.A.2',
														
 
															-        '3.C': '3.C.7',
														
 
															-        '3.D': 'M.3.AS',
														
 
															-        '3.F': '3.C.1.b',
														
 
															-        '3.H': '3.C.3',
														
 
															-        '4': 'M.LULUCF',
														
 
															-        '5': '4',
														
 
															-        '5.A': '4.A',
														
 
															-        '5.B': '4.B',
														
 
															-        '5.C': '4.C',
														
 
															-        '5.D': '4.D',
														
 
															-        '5.D.1': '4.D.1',
														
 
															-        '5.D.2': '4.D.2',
														
 
															+        "3": "M.AG",
														
 
															+        "3.A": "3.A.1",
														
 
															+        "3.B": "3.A.2",
														
 
															+        "3.C": "3.C.7",
														
 
															+        "3.D": "M.3.AS",
														
 
															+        "3.F": "3.C.1.b",
														
 
															+        "3.H": "3.C.3",
														
 
															+        "4": "M.LULUCF",
														
 
															+        "5": "4",
														
 
															+        "5.A": "4.A",
														
 
															+        "5.B": "4.B",
														
 
															+        "5.C": "4.C",
														
 
															+        "5.D": "4.D",
														
 
															+        "5.D.1": "4.D.1",
														
 
															+        "5.D.2": "4.D.2",
														
 
															     }
														
 
															     aggregate_cats = {
														
 
															-        '1.A': {'sources': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
														
 
															-                'name': 'Fuel Combustion Activities'},
														
 
															-        '1.B': {'sources': ['1.B.1', '1.B.2'], 'name': 'Fugitive Emissions from Fuels'},
														
 
															-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-        '3.C.1': {'sources': ['3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
														
 
															-        '3.C.5': {'sources': ['3.C.5.a', '3.C.5.b'],
														
 
															-                  'name': 'Indirect N2O Emissions from Managed Soils'},
														
 
															-        '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.AS', '3.C.7'],
														
 
															-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															-        'M.AG.ELV': {'sources': ['3.C'],
														
 
															-                     'name': 'Agriculture excluding livestock emissions'},
														
 
															+        "1.A": {
														
 
															+            "sources": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
														
 
															+            "name": "Fuel Combustion Activities",
														
 
															+        },
														
 
															+        "1.B": {"sources": ["1.B.1", "1.B.2"], "name": "Fugitive Emissions from Fuels"},
														
 
															+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+        "3.C.1": {"sources": ["3.C.1.b"], "name": "Emissions from Biomass Burning"},
														
 
															+        "3.C.5": {
														
 
															+            "sources": ["3.C.5.a", "3.C.5.b"],
														
 
															+            "name": "Indirect N2O Emissions from Managed Soils",
														
 
															+        },
														
 
															+        "3.C": {
														
 
															+            "sources": ["3.C.1", "3.C.3", "M.3.AS", "3.C.7"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+        },
														
 
															+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["3.C"],
														
 
															+            "name": "Agriculture excluding livestock emissions",
														
 
															+        },
														
 
															     }
														
 
															-
														
 
															     # 2 for NF3, PFCs (from 2.E)
														
 
															     aggregate_cats_NF3_PFC = {
														
 
															-        '2': {'sources': ['2.E'], 'name': 'Industrial Process and Product Use Sector'},
														
 
															+        "2": {"sources": ["2.E"], "name": "Industrial Process and Product Use Sector"},
														
 
															     }
														
 
															     compression = dict(zlib=True, complevel=9)
														
@@ -130,11 +147,10 @@ if __name__ == "__main__":
 
															             str(input_folder / inventory_file),
														
 
															             pages=page,
														
 
															             **page_defs[page],
														
 
															-            )
														
 
															+        )
														
 
															         for table in new_tables:
														
 
															             all_tables.append(table.df)
														
 
															-
														
 
															     # ###
														
 
															     # convert tables to primap2 format
														
 
															     # ###
														
@@ -148,39 +164,49 @@ if __name__ == "__main__":
 
															         if len(table_def["tables"]) > 1:
														
 
															             for table in table_def["tables"][1:]:
														
 
															                 df_this_table = pd.concat(
														
 
															-                    [df_this_table, all_tables[table]],
														
 
															-                    axis=0,
														
 
															-                    join='outer')
														
 
															+                    [df_this_table, all_tables[table]], axis=0, join="outer"
														
 
															+                )
														
 
															         # fix for table ES3.6
														
 
															-        if table_name == 'ES3.6':
														
 
															+        if table_name == "ES3.6":
														
 
															             col_idx = df_this_table[0] == "Total CO Emission"
														
 
															-            df_this_table.loc[col_idx, 1:] = ''
														
 
															-            df_this_table.loc[col_idx, 0] = 'Total CO2 Emission'
														
 
															+            df_this_table.loc[col_idx, 1:] = ""
														
 
															+            df_this_table.loc[col_idx, 0] = "Total CO2 Emission"
														
 
															         df_this_table = df_this_table.reset_index(drop=True)
														
 
															         # fix categories if necessary
														
 
															         if "fix_cats" in table_def.keys():
														
 
															             for col in table_def["fix_cats"]:
														
 
															-                df_this_table[col] = df_this_table[col].replace(table_def["fix_cats"][col])
														
 
															+                df_this_table[col] = df_this_table[col].replace(
														
 
															+                    table_def["fix_cats"][col]
														
 
															+                )
														
 
															         # fix rows
														
 
															         for col in table_def["rows_to_fix"].keys():
														
 
															             for n_rows in table_def["rows_to_fix"][col].keys():
														
 
															                 print(f"Fixing {col}, {n_rows}")
														
 
															                 # replace line breaks, long hyphens, double, and triple spaces in category names
														
 
															-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
														
 
															-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
														
 
															-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
														
 
															-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("-", "-")
														
 
															-                df_this_table = fix_rows(df_this_table,
														
 
															-                                         table_def["rows_to_fix"][col][n_rows], col, n_rows)
														
 
															+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
														
 
															+                    "\n", " "
														
 
															+                )
														
 
															+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
														
 
															+                    "   ", " "
														
 
															+                )
														
 
															+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
														
 
															+                    "  ", " "
														
 
															+                )
														
 
															+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
														
 
															+                    "-", "-"
														
 
															+                )
														
 
															+                df_this_table = fix_rows(
														
 
															+                    df_this_table, table_def["rows_to_fix"][col][n_rows], col, n_rows
														
 
															+                )
														
 
															         # split by entity
														
 
															         if "gas_splitting" in table_def.keys():
														
 
															-            col_entity = [''] * len(df_this_table)
														
 
															-            last_entity = ''
														
 
															+            col_entity = [""] * len(df_this_table)
														
 
															+            last_entity = ""
														
 
															             for i in range(0, len(df_this_table)):
														
 
															                 current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
														
 
															                 if current_header in table_def["gas_splitting"].keys():
														
@@ -191,8 +217,12 @@ if __name__ == "__main__":
 
															             table_def["index_cols"].append("entity")
														
 
															         # make a wide table
														
 
															-        df_this_table = make_wide_table(df_this_table, table_def["wide_keyword"],
														
 
															-                                        table_def["col_wide_kwd"], table_def["index_cols"])
														
 
															+        df_this_table = make_wide_table(
														
 
															+            df_this_table,
														
 
															+            table_def["wide_keyword"],
														
 
															+            table_def["col_wide_kwd"],
														
 
															+            table_def["index_cols"],
														
 
															+        )
														
 
															         if "drop_rows" in table_def.keys():
														
 
															             df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
														
@@ -207,11 +237,12 @@ if __name__ == "__main__":
 
															         # add unit
														
 
															         df_this_table["unit"] = table_def["unit"]
														
 
															-        df_this_table = df_this_table.rename({table_def["index_cols"][0]: "orig_cat_name"},
														
 
															-                                             axis=1)
														
 
															+        df_this_table = df_this_table.rename(
														
 
															+            {table_def["index_cols"][0]: "orig_cat_name"}, axis=1
														
 
															+        )
														
 
															         # print(table_def["index_cols"][0])
														
 
															-        # print(df_this_table.columns.values)
														
 
															+        # print(df_this_table.columns.to_numpy())
														
 
															         # make a copy of the categories row
														
 
															         df_this_table["category"] = df_this_table["orig_cat_name"]
														
@@ -219,25 +250,30 @@ if __name__ == "__main__":
 
															         # replace cat names by codes in col "category"
														
 
															         # first the manual replacements
														
 
															         df_this_table["category"] = df_this_table["category"].replace(
														
 
															-            table_def["cat_codes_manual"])
														
 
															+            table_def["cat_codes_manual"]
														
 
															+        )
														
 
															+
														
 
															         # then the regex replacements
														
 
															-        def repl(m):
														
 
															-            return m.group('code')
														
 
															-        df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
														
 
															-                                                                          repl, regex=True)
														
 
															+        def repl(m):  # noqa: D103
														
 
															+            return m.group("code")
														
 
															+
														
 
															+        df_this_table["category"] = df_this_table["category"].str.replace(
														
 
															+            cat_code_regexp, repl, regex=True
														
 
															+        )
														
 
															         ### convert to PRIMAP2 IF
														
 
															         # remove ','
														
 
															-        time_format = '%Y'
														
 
															+        time_format = "%Y"
														
 
															         time_columns = [
														
 
															             col
														
 
															-            for col in df_this_table.columns.values
														
 
															+            for col in df_this_table.columns.to_numpy()
														
 
															             if matches_time_format(col, time_format)
														
 
															         ]
														
 
															         for col in time_columns:
														
 
															-            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(',', '',
														
 
															-                                                                              regex=False)
														
 
															+            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
														
 
															+                ",", "", regex=False
														
 
															+            )
														
 
															         # drop orig_cat_name as it's not unique per category
														
 
															         df_this_table = df_this_table.drop(columns="orig_cat_name")
														
@@ -254,7 +290,7 @@ if __name__ == "__main__":
 
															             # coords_value_filling=coords_value_filling,
														
 
															             # filter_remove=filter_remove,
														
 
															             # filter_keep=filter_keep,
														
 
															-            meta_data=meta_data
														
 
															+            meta_data=meta_data,
														
 
															         )
														
 
															         this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
														
@@ -267,7 +303,6 @@ if __name__ == "__main__":
 
															     # convert back to IF to have units in the fixed format
														
 
															     data_if = data_pm2.pr.to_interchange_format()
														
 
															-
														
 
															     # ###
														
 
															     # convert to IPCC2006 categories
														
 
															     # ###
														
@@ -275,31 +310,36 @@ if __name__ == "__main__":
 
															     data_if_2006
														
 
															     # filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
														
 
															     data_if_2006 = data_if_2006.replace(
														
 
															-        {'category (IPCC2006_1996_Taiwan_Inv)': cat_mapping})
														
 
															+        {"category (IPCC2006_1996_Taiwan_Inv)": cat_mapping}
														
 
															+    )
														
 
															     # rename the category col
														
 
															-    data_if_2006.rename(
														
 
															-        columns={'category (IPCC2006_1996_Taiwan_Inv)': 'category (IPCC2006_PRIMAP)'},
														
 
															-        inplace=True)
														
 
															-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
														
 
															-    data_if_2006.attrs['dimensions']['*'] = [
														
 
															-        'category (IPCC2006_PRIMAP)' if item == 'category (IPCC2006_1996_Taiwan_Inv)'
														
 
															-        else item for item in data_if_2006.attrs['dimensions']['*']]
														
 
															+    data_if_2006 = data_if_2006.rename(
														
 
															+        columns={"category (IPCC2006_1996_Taiwan_Inv)": "category (IPCC2006_PRIMAP)"}
														
 
															+    )
														
 
															+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
														
 
															+    data_if_2006.attrs["dimensions"]["*"] = [
														
 
															+        "category (IPCC2006_PRIMAP)"
														
 
															+        if item == "category (IPCC2006_1996_Taiwan_Inv)"
														
 
															+        else item
														
 
															+        for item in data_if_2006.attrs["dimensions"]["*"]
														
 
															+    ]
														
 
															     # aggregate categories
														
 
															     for cat_to_agg in aggregate_cats:
														
 
															         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
														
 
															-            aggregate_cats[cat_to_agg]["sources"])
														
 
															+            aggregate_cats[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -307,8 +347,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
														
 
															             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
														
@@ -324,19 +371,21 @@ if __name__ == "__main__":
 
															     # aggregate categories
														
 
															     for cat_to_agg in aggregate_cats_NF3_PFC:
														
 
															         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
														
 
															-            aggregate_cats_NF3_PFC[cat_to_agg]["sources"])
														
 
															+            aggregate_cats_NF3_PFC[cat_to_agg]["sources"]
														
 
															+        )
														
 
															         mask_gas = data_if_2006["entity"].isin(
														
 
															-            [f"NF3 ({gwp_to_use})", f"PFCS ({gwp_to_use})"])
														
 
															+            [f"NF3 ({gwp_to_use})", f"PFCS ({gwp_to_use})"]
														
 
															+        )
														
 
															         df_test = data_if_2006[mask & mask_gas]
														
 
															         if len(df_test) > 0:
														
 
															             print(f"Aggregating category {cat_to_agg}")
														
 
															             df_combine = df_test.copy(deep=True)
														
 
															-            time_format = '%Y'
														
 
															+            time_format = "%Y"
														
 
															             time_columns = [
														
 
															                 col
														
 
															-                for col in df_combine.columns.values
														
 
															+                for col in df_combine.columns.to_numpy()
														
 
															                 if matches_time_format(col, time_format)
														
 
															             ]
														
@@ -344,8 +393,15 @@ if __name__ == "__main__":
 
															                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
														
 
															             df_combine = df_combine.groupby(
														
 
															-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
														
 
															-                    'unit']).sum(min_count=1)
														
 
															+                by=[
														
 
															+                    "source",
														
 
															+                    "scenario (PRIMAP)",
														
 
															+                    "provenance",
														
 
															+                    "area (ISO3)",
														
 
															+                    "entity",
														
 
															+                    "unit",
														
 
															+                ]
														
 
															+            ).sum(min_count=1)
														
 
															             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
														
 
															             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
														
@@ -362,7 +418,7 @@ if __name__ == "__main__":
 
															     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
														
 
															     # convert to mass units from CO2eq
														
 
															-    entities_to_convert = ['N2O', 'SF6', 'CH4', 'NF3']
														
 
															+    entities_to_convert = ["N2O", "SF6", "CH4", "NF3"]
														
 
															     entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
														
 
															     for entity in entities_to_convert:
														
@@ -382,19 +438,23 @@ if __name__ == "__main__":
 
															     # save data
														
 
															     # ###
														
 
															     # data in original categories
														
 
															-    pm2.pm2io.write_interchange_format(output_folder /
														
 
															-                                       (output_filename + coords_terminologies["category"]),
														
 
															-                                       data_if)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + coords_terminologies["category"]), data_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2.data_vars}
														
 
															-    data_pm2.pr.to_netcdf((output_folder /
														
 
															-                          (output_filename + coords_terminologies[
														
 
															-                              "category"])).with_suffix(".nc"),
														
 
															-                          encoding=encoding)
														
 
															+    data_pm2.pr.to_netcdf(
														
 
															+        (
														
 
															+            output_folder / (output_filename + coords_terminologies["category"])
														
 
															+        ).with_suffix(".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # data in 2006 categories
														
 
															-    pm2.pm2io.write_interchange_format(output_folder /
														
 
															-                                       (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
														
 
															+    pm2.pm2io.write_interchange_format(
														
 
															+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_pm2_2006.data_vars}
														
 
															-    data_pm2_2006.pr.to_netcdf((output_folder /
														
 
															-                                (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
														
 
															-                               encoding=encoding)
														
 
															+    data_pm2_2006.pr.to_netcdf(
														
 
															+        (output_folder / (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
@@ -0,0 +1,30 @@
 
															+"""Read Thailand's BURs, NIRs, NCs
														
 
															+
														
 
															+Scripts and configurations to read Argentina's submissions to the UNFCCC.
														
 
															+Currently, the following submissions and datasets are available (all datasets
														
 
															+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
														
 
															+
														
 
															+.. exec_code::
														
 
															+    :hide_code:
														
 
															+
														
 
															+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
														
 
															+                                                  get_country_submissions)
														
 
															+    country = 'THA'
														
 
															+    # print available submissions
														
 
															+    print("="*15 + " Available submissions " + "="*15)
														
 
															+    get_country_submissions(country, True)
														
 
															+    print("")
														
 
															+
														
 
															+    #print available datasets
														
 
															+    print("="*15 + " Available datasets " + "="*15)
														
 
															+    get_country_datasets(country, True)
														
 
															+
														
 
															+You can also obtain this information running
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    poetry run doit country_info country=THA
														
 
															+
														
 
															+See below for a listing of scripts for BUR/NIR reading including links.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py
@@ -1,38 +1,54 @@
 
															-# configuration for Thailand, BUR4
														
 
															+"""Config for Thailand's BUR4
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															+
														
 
															+
														
 
															 # ###
														
 
															 # for reading
														
 
															 # ###
														
 
															 # general
														
 
															 gwp_to_use = "AR4GWP100"
														
 
															-terminology_proc = 'IPCC2006_PRIMAP'
														
 
															+terminology_proc = "IPCC2006_PRIMAP"
														
 
															-header_inventory = ['Greenhouse gas source and sink categories',
														
 
															-                   'CO2 emissions', 'CO2 removals',
														
 
															-                   'CH4', 'N2O', 'NOx', 'CO', 'NMVOCs',
														
 
															-                   'SO2', 'HFCs', 'PFCs', 'SF6']
														
 
															-unit_inventory = ['Gg'] * len(header_inventory)
														
 
															+header_inventory = [
														
 
															+    "Greenhouse gas source and sink categories",
														
 
															+    "CO2 emissions",
														
 
															+    "CO2 removals",
														
 
															+    "CH4",
														
 
															+    "N2O",
														
 
															+    "NOx",
														
 
															+    "CO",
														
 
															+    "NMVOCs",
														
 
															+    "SO2",
														
 
															+    "HFCs",
														
 
															+    "PFCs",
														
 
															+    "SF6",
														
 
															+]
														
 
															+unit_inventory = ["Gg"] * len(header_inventory)
														
 
															 unit_inventory[9] = "GgCO2eq"
														
 
															 unit_inventory[10] = "GgCO2eq"
														
 
															 # 2019 inventory
														
 
															 inv_conf = {
														
 
															-    'year': 2016,
														
 
															-    'entity_row': 0,
														
 
															-    'unit_row': 1,
														
 
															-    'index_cols': "Greenhouse gas source and sink categories",
														
 
															-    'header': header_inventory,
														
 
															-    'unit': unit_inventory,
														
 
															+    "year": 2016,
														
 
															+    "entity_row": 0,
														
 
															+    "unit_row": 1,
														
 
															+    "index_cols": "Greenhouse gas source and sink categories",
														
 
															+    "header": header_inventory,
														
 
															+    "unit": unit_inventory,
														
 
															     # special header as category UNFCCC_GHG_data and name in one column
														
 
															-    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
														
 
															+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
														
 
															     # manual category codes (manual mapping to primap1, will be mapped to primap2
														
 
															     # # automatically with the other codes)
														
 
															-    'cat_codes_manual': {
														
 
															-        '6. Other Memo Items (not accounted in Total Emissions)': 'MEMO',
														
 
															-        'International Bunkers': 'MBK',
														
 
															-        'CO2 from Biomass': 'MBIO',
														
 
															+    "cat_codes_manual": {
														
 
															+        "6. Other Memo Items (not accounted in Total Emissions)": "MEMO",
														
 
															+        "International Bunkers": "MBK",
														
 
															+        "CO2 from Biomass": "MBIO",
														
 
															     },
														
 
															-    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
														
 
															+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*",
														
 
															 }
														
 
															 # primap2 format conversion
														
@@ -59,14 +75,14 @@ coords_value_mapping = {
 
															     "unit": "PRIMAP1",
														
 
															     "category": "PRIMAP1",
														
 
															     "entity": {
														
 
															-        'HFCs': f"HFCS ({gwp_to_use})",
														
 
															-        'PFCs': f"PFCS ({gwp_to_use})",
														
 
															-        'NMVOCs': 'NMVOC',
														
 
															+        "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+        "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+        "NMVOCs": "NMVOC",
														
 
															     },
														
 
															 }
														
 
															 filter_remove = {
														
 
															-    'f_memo': {"category": "MEMO"},
														
 
															+    "f_memo": {"category": "MEMO"},
														
 
															 }
														
 
															 filter_keep = {}
														
@@ -81,26 +97,31 @@ meta_data = {
 
															 # main sector time series
														
 
															 header_main_sector_ts = [
														
 
															-    'Year', 'Energy', 'IPPU',
														
 
															-    'Agriculture', 'LULUCF', 'Waste',
														
 
															-    'Net emissions (Including LULUCF)',
														
 
															-    'Net emissions (Excluding LULUCF)']
														
 
															-unit_main_sector_ts = ['GgCO2eq'] * len(header_main_sector_ts)
														
 
															-unit_main_sector_ts[0] = ''
														
 
															+    "Year",
														
 
															+    "Energy",
														
 
															+    "IPPU",
														
 
															+    "Agriculture",
														
 
															+    "LULUCF",
														
 
															+    "Waste",
														
 
															+    "Net emissions (Including LULUCF)",
														
 
															+    "Net emissions (Excluding LULUCF)",
														
 
															+]
														
 
															+unit_main_sector_ts = ["GgCO2eq"] * len(header_main_sector_ts)
														
 
															+unit_main_sector_ts[0] = ""
														
 
															 trend_conf = {
														
 
															-    'header': header_main_sector_ts,
														
 
															-    'unit': unit_main_sector_ts,
														
 
															+    "header": header_main_sector_ts,
														
 
															+    "unit": unit_main_sector_ts,
														
 
															     # manual category codes (manual mapping to primap1, will be mapped to primap2
														
 
															     # automatically with the other codes)
														
 
															-    'cat_codes_manual': {
														
 
															-        'Energy': "1",
														
 
															-        'IPPU': "2",
														
 
															-        'Agriculture': "3",
														
 
															-        'LULUCF': "4",
														
 
															-        'Waste': "5",
														
 
															-        'Net emissions (Including LULUCF)': "0",
														
 
															-        'Net emissions (Excluding LULUCF)': "M0EL",
														
 
															+    "cat_codes_manual": {
														
 
															+        "Energy": "1",
														
 
															+        "IPPU": "2",
														
 
															+        "Agriculture": "3",
														
 
															+        "LULUCF": "4",
														
 
															+        "Waste": "5",
														
 
															+        "Net emissions (Including LULUCF)": "0",
														
 
															+        "Net emissions (Excluding LULUCF)": "M0EL",
														
 
															     },
														
 
															 }
														
@@ -118,14 +139,13 @@ coords_defaults_main_sector_ts = {
 
															 }
														
 
															 # indirect gases time series
														
 
															-header_indirect = ['Year', 'NOx', 'CO',
														
 
															-                    'NMVOCs', 'SO2']
														
 
															-unit_indirect = ['Gg'] * len(header_indirect)
														
 
															-unit_indirect[0] = ''
														
 
															+header_indirect = ["Year", "NOx", "CO", "NMVOCs", "SO2"]
														
 
															+unit_indirect = ["Gg"] * len(header_indirect)
														
 
															+unit_indirect[0] = ""
														
 
															 ind_conf = {
														
 
															-    'header': header_indirect,
														
 
															-    'unit': unit_indirect,
														
 
															-    'cols_to_remove': ['Average Annual Growth Rate'],
														
 
															+    "header": header_indirect,
														
 
															+    "unit": unit_indirect,
														
 
															+    "cols_to_remove": ["Average Annual Growth Rate"],
														
 
															 }
														
 
															 coords_cols_indirect = {
														
@@ -146,111 +166,203 @@ coords_defaults_indirect = {
 
															 # ###
														
 
															 # aggregate categories
														
 
															 country_processing_step1 = {
														
 
															-    'aggregate_cats': {
														
 
															-        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
														
 
															-                  'name': 'Other Process uses of Carbonates'},
														
 
															+    "aggregate_cats": {
														
 
															+        "2.A.4": {
														
 
															+            "sources": ["2.A.4.b", "2.A.4.d"],
														
 
															+            "name": "Other Process uses of Carbonates",
														
 
															+        },
														
 
															     },
														
 
															-    'aggregate_gases': {
														
 
															-        'KYOTOGHG': {
														
 
															-            'basket': 'KYOTOGHG (AR4GWP100)',
														
 
															-            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
														
 
															-                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
														
 
															-            'skipna': True,
														
 
															-            'min_count': 1,
														
 
															-            'sel': {f'category ({coords_terminologies["category"]})':
														
 
															-                [
														
 
															-                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
														
 
															-                    '1.A.4', '1.B', '1.B.1', '1.B.2',
														
 
															-                    '1.C',
														
 
															-                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
														
 
															-                    '2.B', '2.C', '2.D', '2.H',
														
 
															-                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                    '3.H', '3.I',
														
 
															-                    '4', '4.A', '4.B', '4.C', '4.D', '4.E',
														
 
															-                    '5', '5.A', '5.B', '5.C', '5.D'
														
 
															+    "aggregate_gases": {
														
 
															+        "KYOTOGHG": {
														
 
															+            "basket": "KYOTOGHG (AR4GWP100)",
														
 
															+            "basket_contents": [
														
 
															+                "CO2",
														
 
															+                "CH4",
														
 
															+                "N2O",
														
 
															+                "SF6",
														
 
															+                "HFCS (AR4GWP100)",
														
 
															+                "PFCS (AR4GWP100)",
														
 
															+            ],
														
 
															+            "skipna": True,
														
 
															+            "min_count": 1,
														
 
															+            "sel": {
														
 
															+                f'category ({coords_terminologies["category"]})': [
														
 
															+                    "0",
														
 
															+                    "1",
														
 
															+                    "1.A",
														
 
															+                    "1.A.1",
														
 
															+                    "1.A.2",
														
 
															+                    "1.A.3",
														
 
															+                    "1.A.4",
														
 
															+                    "1.B",
														
 
															+                    "1.B.1",
														
 
															+                    "1.B.2",
														
 
															+                    "1.C",
														
 
															+                    "2",
														
 
															+                    "2.A",
														
 
															+                    "2.A.1",
														
 
															+                    "2.A.2",
														
 
															+                    "2.A.3",
														
 
															+                    "2.A.4",
														
 
															+                    "2.B",
														
 
															+                    "2.C",
														
 
															+                    "2.D",
														
 
															+                    "2.H",
														
 
															+                    "3",
														
 
															+                    "3.A",
														
 
															+                    "3.B",
														
 
															+                    "3.C",
														
 
															+                    "3.D",
														
 
															+                    "3.E",
														
 
															+                    "3.F",
														
 
															+                    "3.G",
														
 
															+                    "3.H",
														
 
															+                    "3.I",
														
 
															+                    "4",
														
 
															+                    "4.A",
														
 
															+                    "4.B",
														
 
															+                    "4.C",
														
 
															+                    "4.D",
														
 
															+                    "4.E",
														
 
															+                    "5",
														
 
															+                    "5.A",
														
 
															+                    "5.B",
														
 
															+                    "5.C",
														
 
															+                    "5.D",
														
 
															                 ]
														
 
															-            }, # not tested
														
 
															+            },  # not tested
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 country_processing_step2 = {
														
 
															-    'downscale': {
														
 
															+    "downscale": {
														
 
															         # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
														
 
															         # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
														
 
															-        'sectors': {
														
 
															-            '1': {
														
 
															-                'basket': '1',
														
 
															-                'basket_contents': ['1.A', '1.B', '1.C'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+        "sectors": {
														
 
															+            "1": {
														
 
															+                "basket": "1",
														
 
															+                "basket_contents": ["1.A", "1.B", "1.C"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '1.A': {
														
 
															-                'basket': '1.A',
														
 
															-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "1.A": {
														
 
															+                "basket": "1.A",
														
 
															+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '1.B': {
														
 
															-                'basket': '1.B',
														
 
															-                'basket_contents': ['1.B.1', '1.B.2'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "1.B": {
														
 
															+                "basket": "1.B",
														
 
															+                "basket_contents": ["1.B.1", "1.B.2"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '2': {
														
 
															-                'basket': '2',
														
 
															-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.H'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "2": {
														
 
															+                "basket": "2",
														
 
															+                "basket_contents": ["2.A", "2.B", "2.C", "2.D", "2.H"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '2.A': {
														
 
															-                'basket': '2.A',
														
 
															-                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "2.A": {
														
 
															+                "basket": "2.A",
														
 
															+                "basket_contents": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '3': {
														
 
															-                'basket': '3',
														
 
															-                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                                    '3.H', '3.I'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "3": {
														
 
															+                "basket": "3",
														
 
															+                "basket_contents": [
														
 
															+                    "3.A",
														
 
															+                    "3.B",
														
 
															+                    "3.C",
														
 
															+                    "3.D",
														
 
															+                    "3.E",
														
 
															+                    "3.F",
														
 
															+                    "3.G",
														
 
															+                    "3.H",
														
 
															+                    "3.I",
														
 
															+                ],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '4': {
														
 
															-                'basket': '4',
														
 
															-                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "4": {
														
 
															+                "basket": "4",
														
 
															+                "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '5': {
														
 
															-                'basket': '5',
														
 
															-                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "5": {
														
 
															+                "basket": "5",
														
 
															+                "basket_contents": ["5.A", "5.B", "5.C", "5.D"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															         },
														
 
															-        'entities': {
														
 
															-            'KYOTO': {
														
 
															-                'basket': 'KYOTOGHG (AR4GWP100)',
														
 
															-                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
														
 
															-                                    'PFCS (AR4GWP100)', 'SF6'],
														
 
															-                'sel': {f'category ({coords_terminologies["category"]})':
														
 
															-                    [
														
 
															-                        '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
														
 
															-                        '1.A.4', '1.B', '1.B.1', '1.B.2', '1.C',
														
 
															-                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
														
 
															-                        '2.B', '2.C', '2.D', '2.H',
														
 
															-                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                        '3.H', '3.I',
														
 
															-                        '4', '4.A', '4.B', '4.C', '4.D', '4.E',
														
 
															-                        '5', '5.A', '5.B', '5.C', '5.D']},
														
 
															+        "entities": {
														
 
															+            "KYOTO": {
														
 
															+                "basket": "KYOTOGHG (AR4GWP100)",
														
 
															+                "basket_contents": [
														
 
															+                    "CH4",
														
 
															+                    "CO2",
														
 
															+                    "N2O",
														
 
															+                    "HFCS (AR4GWP100)",
														
 
															+                    "PFCS (AR4GWP100)",
														
 
															+                    "SF6",
														
 
															+                ],
														
 
															+                "sel": {
														
 
															+                    f'category ({coords_terminologies["category"]})': [
														
 
															+                        "0",
														
 
															+                        "1",
														
 
															+                        "1.A",
														
 
															+                        "1.A.1",
														
 
															+                        "1.A.2",
														
 
															+                        "1.A.3",
														
 
															+                        "1.A.4",
														
 
															+                        "1.B",
														
 
															+                        "1.B.1",
														
 
															+                        "1.B.2",
														
 
															+                        "1.C",
														
 
															+                        "2",
														
 
															+                        "2.A",
														
 
															+                        "2.A.1",
														
 
															+                        "2.A.2",
														
 
															+                        "2.A.3",
														
 
															+                        "2.A.4",
														
 
															+                        "2.B",
														
 
															+                        "2.C",
														
 
															+                        "2.D",
														
 
															+                        "2.H",
														
 
															+                        "3",
														
 
															+                        "3.A",
														
 
															+                        "3.B",
														
 
															+                        "3.C",
														
 
															+                        "3.D",
														
 
															+                        "3.E",
														
 
															+                        "3.F",
														
 
															+                        "3.G",
														
 
															+                        "3.H",
														
 
															+                        "3.I",
														
 
															+                        "4",
														
 
															+                        "4.A",
														
 
															+                        "4.B",
														
 
															+                        "4.C",
														
 
															+                        "4.D",
														
 
															+                        "4.E",
														
 
															+                        "5",
														
 
															+                        "5.A",
														
 
															+                        "5.B",
														
 
															+                        "5.C",
														
 
															+                        "5.D",
														
 
															+                    ]
														
 
															+                },
														
 
															             },
														
 
															         },
														
 
															     },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
 
															 ## not in BUR3: 1.A.1.a, 1.A.1.b, 1.A.3.a, 1.A.3.b, 1.A.3.c, 1.A.3.d, 1.A.5, 1.B.3,
														
@@ -258,106 +370,176 @@ country_processing_step2 = {
 
															 # 4.E.x, 5.X.y M.BK.A, M.BK.M
														
 
															 cat_conversion = {
														
 
															-    'mapping': {
														
 
															-        '0': '0',
														
 
															-        'M.0.EL': 'M.0.EL',
														
 
															-        '1': '1',
														
 
															-        '1.A': '1.A',
														
 
															-        '1.A.1': '1.A.1',
														
 
															-        '1.A.2': '1.A.2',
														
 
															-        '1.A.3': '1.A.3',
														
 
															-        '1.A.4': '1.A.4',
														
 
															-        '1.B': '1.B',
														
 
															-        '1.B.1': '1.B.1',
														
 
															-        '1.B.2': '1.B.2',
														
 
															-        '1.C': '1.C',
														
 
															-        '1.C.1': '1.C.1',
														
 
															-        '1.C.2': '1.C.2',
														
 
															-        '1.C.3': '1.C.3',
														
 
															-        '2': '2',
														
 
															-        '2.A': '2.A',
														
 
															-        '2.A.1': '2.A.1',
														
 
															-        '2.A.2': '2.A.2',
														
 
															-        '2.A.3': '2.A.3',
														
 
															-        '2.A.4': '2.A.4',
														
 
															-        '2.A.4.b': '2.A.4.b',
														
 
															-        '2.A.4.d': '2.A.4.d',
														
 
															-        '2.B': '2.B',
														
 
															-        '2.C': '2.C',
														
 
															-        '2.C.1': '2.C.1',
														
 
															-        '2.D': '2.D',
														
 
															-        '2.D.1': '2.D.1',
														
 
															-        '2.H': '2.H',
														
 
															-        '2.H.1': '2.H.1',
														
 
															-        '2.H.2': '2.H.2',
														
 
															-        '3': 'M.AG',
														
 
															-        '3.A': '3.A.1',
														
 
															-        '3.B': '3.A.2',
														
 
															-        '3.C': 'M.3.C.1.AG',  # field burning of agricultural residues
														
 
															-        '3.D': '3.C.2',  # Liming
														
 
															-        '3.E': '3.C.3',  # urea application
														
 
															-        '3.F': '3.C.4',  # direct N2O from agri soils
														
 
															-        '3.G': '3.C.5',  # indirect N2O from agri soils
														
 
															-        '3.H': '3.C.6',  # indirect N2O from manure management
														
 
															-        '3.I': '3.C.7',  # rice
														
 
															-        '4': 'M.LULUCF',
														
 
															-        '4.A': '3.B.1.a',  # forest remaining forest
														
 
															-        '4.B': '3.B.2.a',  # cropland remaining cropland
														
 
															-        '4.C': '3.B.2.b',  # land converted to cropland
														
 
															-        '4.D': '3.B.6.b',  # land converted to other land
														
 
															-        '4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
														
 
															-        '5': '4',
														
 
															-        '5.A': '4.A',
														
 
															-        '5.B': '4.B',
														
 
															-        '5.C': '4.C',
														
 
															-        '5.D': '4.D',
														
 
															-        'M.BK': 'M.BK',
														
 
															-        'M.BIO': 'M.BIO',
														
 
															+    "mapping": {
														
 
															+        "0": "0",
														
 
															+        "M.0.EL": "M.0.EL",
														
 
															+        "1": "1",
														
 
															+        "1.A": "1.A",
														
 
															+        "1.A.1": "1.A.1",
														
 
															+        "1.A.2": "1.A.2",
														
 
															+        "1.A.3": "1.A.3",
														
 
															+        "1.A.4": "1.A.4",
														
 
															+        "1.B": "1.B",
														
 
															+        "1.B.1": "1.B.1",
														
 
															+        "1.B.2": "1.B.2",
														
 
															+        "1.C": "1.C",
														
 
															+        "1.C.1": "1.C.1",
														
 
															+        "1.C.2": "1.C.2",
														
 
															+        "1.C.3": "1.C.3",
														
 
															+        "2": "2",
														
 
															+        "2.A": "2.A",
														
 
															+        "2.A.1": "2.A.1",
														
 
															+        "2.A.2": "2.A.2",
														
 
															+        "2.A.3": "2.A.3",
														
 
															+        "2.A.4": "2.A.4",
														
 
															+        "2.A.4.b": "2.A.4.b",
														
 
															+        "2.A.4.d": "2.A.4.d",
														
 
															+        "2.B": "2.B",
														
 
															+        "2.C": "2.C",
														
 
															+        "2.C.1": "2.C.1",
														
 
															+        "2.D": "2.D",
														
 
															+        "2.D.1": "2.D.1",
														
 
															+        "2.H": "2.H",
														
 
															+        "2.H.1": "2.H.1",
														
 
															+        "2.H.2": "2.H.2",
														
 
															+        "3": "M.AG",
														
 
															+        "3.A": "3.A.1",
														
 
															+        "3.B": "3.A.2",
														
 
															+        "3.C": "M.3.C.1.AG",  # field burning of agricultural residues
														
 
															+        "3.D": "3.C.2",  # Liming
														
 
															+        "3.E": "3.C.3",  # urea application
														
 
															+        "3.F": "3.C.4",  # direct N2O from agri soils
														
 
															+        "3.G": "3.C.5",  # indirect N2O from agri soils
														
 
															+        "3.H": "3.C.6",  # indirect N2O from manure management
														
 
															+        "3.I": "3.C.7",  # rice
														
 
															+        "4": "M.LULUCF",
														
 
															+        "4.A": "3.B.1.a",  # forest remaining forest
														
 
															+        "4.B": "3.B.2.a",  # cropland remaining cropland
														
 
															+        "4.C": "3.B.2.b",  # land converted to cropland
														
 
															+        "4.D": "3.B.6.b",  # land converted to other land
														
 
															+        "4.E": "M.3.C.1.LU",  # biomass burning (LULUCF)
														
 
															+        "5": "4",
														
 
															+        "5.A": "4.A",
														
 
															+        "5.B": "4.B",
														
 
															+        "5.C": "4.C",
														
 
															+        "5.D": "4.D",
														
 
															+        "M.BK": "M.BK",
														
 
															+        "M.BIO": "M.BIO",
														
 
															     },
														
 
															-    'aggregate': {
														
 
															-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
														
 
															-                  'name': 'Emissions from Biomass Burning'},
														
 
															-        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-        'M.3.C.AG': {
														
 
															-            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
														
 
															-                     'name': 'Agriculture excluding livestock emissions'},
														
 
															-        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
														
 
															-        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
														
 
															-        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
														
 
															-        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
														
 
															-        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
														
 
															-        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
														
 
															-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															+    "aggregate": {
														
 
															+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+        "3.C.1": {
														
 
															+            "sources": ["M.3.C.1.AG", "M.3.C.1.LU"],
														
 
															+            "name": "Emissions from Biomass Burning",
														
 
															+        },
														
 
															+        "3.C": {
														
 
															+            "sources": ["3.C.1", "3.C.2", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+        },
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": [
														
 
															+                "M.3.C.1.AG",
														
 
															+                "3.C.2",
														
 
															+                "3.C.3",
														
 
															+                "3.C.4",
														
 
															+                "3.C.5",
														
 
															+                "3.C.6",
														
 
															+                "3.C.7",
														
 
															+            ],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+        },
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG"],
														
 
															+            "name": "Agriculture excluding livestock emissions",
														
 
															+        },
														
 
															+        "M.3.C.LU": {
														
 
															+            "sources": ["M.3.C.1.LU"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
														
 
															+        },
														
 
															+        "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},
														
 
															+        "3.B.2": {"sources": ["3.B.2.a", "3.B.2.b"], "name": "Cropland"},
														
 
															+        "3.B.6": {"sources": ["3.B.6.b"], "name": "Other Land"},
														
 
															+        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.6"], "name": "Land"},
														
 
															+        "M.LULUCF": {"sources": ["3.B", "N.3.C.LU"], "name": "LULUCF"},
														
 
															+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															     },
														
 
															 }
														
 
															 sectors_to_save = [
														
 
															-    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4',
														
 
															-    '1.B', '1.B.1', '1.B.2', '1.C', '1.C.1', '1.C.2', '1.C.3',
														
 
															-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
														
 
															-    '2.B', '2.C', '2.C.1', '2.H', '2.H.1', '2.H.2',
														
 
															-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
														
 
															-    '3.C', '3.C.1', '3.C.2', '3.C.3', '3.C.4',
														
 
															-    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
														
 
															-    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
														
 
															-    '3.B.2.b', '3.B.6', '3.B.6.b',
														
 
															-    '4', '4.A', '4.B', '4.C', '4.D',
														
 
															-    '0', 'M.0.EL', 'M.BK', 'M.BIO']
														
 
															+    "1",
														
 
															+    "1.A",
														
 
															+    "1.A.1",
														
 
															+    "1.A.2",
														
 
															+    "1.A.3",
														
 
															+    "1.A.4",
														
 
															+    "1.B",
														
 
															+    "1.B.1",
														
 
															+    "1.B.2",
														
 
															+    "1.C",
														
 
															+    "1.C.1",
														
 
															+    "1.C.2",
														
 
															+    "1.C.3",
														
 
															+    "2",
														
 
															+    "2.A",
														
 
															+    "2.A.1",
														
 
															+    "2.A.2",
														
 
															+    "2.A.3",
														
 
															+    "2.A.4",
														
 
															+    "2.A.4.b",
														
 
															+    "2.A.4.d",
														
 
															+    "2.B",
														
 
															+    "2.C",
														
 
															+    "2.C.1",
														
 
															+    "2.H",
														
 
															+    "2.H.1",
														
 
															+    "2.H.2",
														
 
															+    "3",
														
 
															+    "M.AG",
														
 
															+    "3.A",
														
 
															+    "3.A.1",
														
 
															+    "3.A.2",
														
 
															+    "3.C",
														
 
															+    "3.C.1",
														
 
															+    "3.C.2",
														
 
															+    "3.C.3",
														
 
															+    "3.C.4",
														
 
															+    "3.C.5",
														
 
															+    "3.C.6",
														
 
															+    "3.C.7",
														
 
															+    "M.3.C.1.AG",
														
 
															+    "M.3.C.AG",
														
 
															+    "M.AG.ELV",
														
 
															+    "M.LULUCF",
														
 
															+    "M.3.C.1.LU",
														
 
															+    "M.3.C.LU",
														
 
															+    "3.B",
														
 
															+    "3.B.1",
														
 
															+    "3.B.1.a",
														
 
															+    "3.B.2",
														
 
															+    "3.B.2.a",
														
 
															+    "3.B.2.b",
														
 
															+    "3.B.6",
														
 
															+    "3.B.6.b",
														
 
															+    "4",
														
 
															+    "4.A",
														
 
															+    "4.B",
														
 
															+    "4.C",
														
 
															+    "4.D",
														
 
															+    "0",
														
 
															+    "M.0.EL",
														
 
															+    "M.BK",
														
 
															+    "M.BIO",
														
 
															+]
														
 
															 # gas baskets
														
 
															 gas_baskets = {
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
														
 
															+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
														
 
															+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
														
 
															+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur4.py
@@ -1,31 +1,35 @@
 
															-# configuration for Thailand, BUR4
														
 
															+"""Config for Thailand's BUR5
														
 
															+
														
 
															+Full configuration including PRIMAP2 conversion config and metadata
														
 
															+
														
 
															+"""
														
 
															 # ###
														
 
															 # for reading
														
 
															 # ###
														
 
															 # general
														
 
															 gwp_to_use = "AR4GWP100"
														
 
															-terminology_proc = 'IPCC2006_PRIMAP'
														
 
															+terminology_proc = "IPCC2006_PRIMAP"
														
 
															 # 2019 inventory
														
 
															 inv_conf = {
														
 
															-    'year': 2019,
														
 
															-    'entity_row': 0,
														
 
															-    'unit_row': 1,
														
 
															-    'index_cols': "Greenhouse gas source and sink categories",
														
 
															+    "year": 2019,
														
 
															+    "entity_row": 0,
														
 
															+    "unit_row": 1,
														
 
															+    "index_cols": "Greenhouse gas source and sink categories",
														
 
															     # special header as category UNFCCC_GHG_data and name in one column
														
 
															-    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
														
 
															+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
														
 
															     # manual category codes (manual mapping to primap1, will be mapped to primap2
														
 
															     # # automatically with the other codes)
														
 
															-    'cat_codes_manual': {
														
 
															-        'Total national emissions and removals': '0',
														
 
															-        'Memo Items (not accounted in total Emissions)': 'MEMO',
														
 
															-        'International Bunkers': 'MBK',
														
 
															-        'Aviation International Bunkers': 'MBKA',
														
 
															-        'Marine-International Bunkers': 'MBKM',
														
 
															-        'CO2 from biomass': 'MBIO',
														
 
															+    "cat_codes_manual": {
														
 
															+        "Total national emissions and removals": "0",
														
 
															+        "Memo Items (not accounted in total Emissions)": "MEMO",
														
 
															+        "International Bunkers": "MBK",
														
 
															+        "Aviation International Bunkers": "MBKA",
														
 
															+        "Marine-International Bunkers": "MBKM",
														
 
															+        "CO2 from biomass": "MBIO",
														
 
															     },
														
 
															-    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
														
 
															+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*",
														
 
															 }
														
 
															 # primap2 format conversion
														
@@ -52,16 +56,16 @@ coords_value_mapping = {
 
															     "unit": "PRIMAP1",
														
 
															     "category": "PRIMAP1",
														
 
															     "entity": {
														
 
															-        'HFCs': f"HFCS ({gwp_to_use})",
														
 
															-        'PFCs': f"PFCS ({gwp_to_use})",
														
 
															-        'SF6': f'SF6 ({gwp_to_use})',
														
 
															-        'NMVOCs': 'NMVOC',
														
 
															-        'Nox': 'NOx',
														
 
															+        "HFCs": f"HFCS ({gwp_to_use})",
														
 
															+        "PFCs": f"PFCS ({gwp_to_use})",
														
 
															+        "SF6": f"SF6 ({gwp_to_use})",
														
 
															+        "NMVOCs": "NMVOC",
														
 
															+        "Nox": "NOx",
														
 
															     },
														
 
															 }
														
 
															 filter_remove = {
														
 
															-    'f_memo': {"category": "MEMO"},
														
 
															+    "f_memo": {"category": "MEMO"},
														
 
															 }
														
 
															 filter_keep = {}
														
@@ -78,13 +82,13 @@ meta_data = {
 
															 # manual category codes (manual mapping to primap1, will be mapped to primap2
														
 
															 # automatically with the other codes)
														
 
															 cat_codes_manual_main_sector_ts = {
														
 
															-    'Energy': "1",
														
 
															-    'Industrial Processes and Product Use': "2",
														
 
															-    'Agriculture': "3",
														
 
															-    'LULUCF': "4",
														
 
															-    'Waste': "5",
														
 
															-    'Net emissions (Include LULUCF)': "0",
														
 
															-    'Total emissions (Exclude LULUCF)': "M0EL",
														
 
															+    "Energy": "1",
														
 
															+    "Industrial Processes and Product Use": "2",
														
 
															+    "Agriculture": "3",
														
 
															+    "LULUCF": "4",
														
 
															+    "Waste": "5",
														
 
															+    "Net emissions (Include LULUCF)": "0",
														
 
															+    "Total emissions (Exclude LULUCF)": "M0EL",
														
 
															 }
														
 
															 coords_cols_main_sector_ts = {
														
@@ -119,263 +123,470 @@ coords_defaults_indirect = {
 
															 # ###
														
 
															 # aggregate categories
														
 
															 country_processing_step1 = {
														
 
															-    'aggregate_cats': {
														
 
															-        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
														
 
															-                  'name': 'Other Process uses of Carbonates'},
														
 
															-        '2.B.8': {'sources': ['2.B.8.b', '2.B.8.c', '2.B.8.e', '2.B.8.f'],
														
 
															-                  'name': 'Petrochemical and Carbon Black production'},
														
 
															+    "aggregate_cats": {
														
 
															+        "2.A.4": {
														
 
															+            "sources": ["2.A.4.b", "2.A.4.d"],
														
 
															+            "name": "Other Process uses of Carbonates",
														
 
															+        },
														
 
															+        "2.B.8": {
														
 
															+            "sources": ["2.B.8.b", "2.B.8.c", "2.B.8.e", "2.B.8.f"],
														
 
															+            "name": "Petrochemical and Carbon Black production",
														
 
															+        },
														
 
															     },
														
 
															-    'aggregate_gases': {
														
 
															-        'KYOTOGHG': {
														
 
															-            'basket': 'KYOTOGHG (AR4GWP100)',
														
 
															-            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
														
 
															-                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
														
 
															-            'skipna': True,
														
 
															-            'min_count': 1,
														
 
															-            'sel': {f'category ({coords_terminologies["category"]})':
														
 
															-                [
														
 
															-                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
														
 
															-                    '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
														
 
															-                    '1.C',
														
 
															-                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
														
 
															-                    '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
														
 
															-                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                    '3.H', '3.I',
														
 
															-                    '4', '4.A', '4.B', '4.C', '4.D',
														
 
															-                    '4.E', '4.E.1', '4.E.2', '4.E.3',
														
 
															-                    '5', '5.A', '5.B', '5.C', '5.D'
														
 
															+    "aggregate_gases": {
														
 
															+        "KYOTOGHG": {
														
 
															+            "basket": "KYOTOGHG (AR4GWP100)",
														
 
															+            "basket_contents": [
														
 
															+                "CO2",
														
 
															+                "CH4",
														
 
															+                "N2O",
														
 
															+                "SF6",
														
 
															+                "HFCS (AR4GWP100)",
														
 
															+                "PFCS (AR4GWP100)",
														
 
															+            ],
														
 
															+            "skipna": True,
														
 
															+            "min_count": 1,
														
 
															+            "sel": {
														
 
															+                f'category ({coords_terminologies["category"]})': [
														
 
															+                    "0",
														
 
															+                    "1",
														
 
															+                    "1.A",
														
 
															+                    "1.A.1",
														
 
															+                    "1.A.2",
														
 
															+                    "1.A.3",
														
 
															+                    "1.A.4",
														
 
															+                    "1.A.5",
														
 
															+                    "1.B",
														
 
															+                    "1.B.1",
														
 
															+                    "1.B.2",
														
 
															+                    "1.B.3",
														
 
															+                    "1.C",
														
 
															+                    "2",
														
 
															+                    "2.A",
														
 
															+                    "2.A.1",
														
 
															+                    "2.A.2",
														
 
															+                    "2.A.3",
														
 
															+                    "2.A.4",
														
 
															+                    "2.B",
														
 
															+                    "2.C",
														
 
															+                    "2.D",
														
 
															+                    "2.F",
														
 
															+                    "2.G",
														
 
															+                    "2.H",
														
 
															+                    "3",
														
 
															+                    "3.A",
														
 
															+                    "3.B",
														
 
															+                    "3.C",
														
 
															+                    "3.D",
														
 
															+                    "3.E",
														
 
															+                    "3.F",
														
 
															+                    "3.G",
														
 
															+                    "3.H",
														
 
															+                    "3.I",
														
 
															+                    "4",
														
 
															+                    "4.A",
														
 
															+                    "4.B",
														
 
															+                    "4.C",
														
 
															+                    "4.D",
														
 
															+                    "4.E",
														
 
															+                    "4.E.1",
														
 
															+                    "4.E.2",
														
 
															+                    "4.E.3",
														
 
															+                    "5",
														
 
															+                    "5.A",
														
 
															+                    "5.B",
														
 
															+                    "5.C",
														
 
															+                    "5.D",
														
 
															                 ]
														
 
															-            }, # not tested
														
 
															+            },  # not tested
														
 
															         },
														
 
															     },
														
 
															 }
														
 
															 country_processing_step2 = {
														
 
															-    'downscale': {
														
 
															+    "downscale": {
														
 
															         # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
														
 
															         # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
														
 
															-        'sectors': {
														
 
															-            '1': {
														
 
															-                'basket': '1',
														
 
															-                'basket_contents': ['1.A', '1.B', '1.C'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+        "sectors": {
														
 
															+            "1": {
														
 
															+                "basket": "1",
														
 
															+                "basket_contents": ["1.A", "1.B", "1.C"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '1.A': {
														
 
															-                'basket': '1.A',
														
 
															-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.5'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "1.A": {
														
 
															+                "basket": "1.A",
														
 
															+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4", "1.A.5"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '1.B': {
														
 
															-                'basket': '1.B',
														
 
															-                'basket_contents': ['1.B.1', '1.B.2', '1.B.3'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "1.B": {
														
 
															+                "basket": "1.B",
														
 
															+                "basket_contents": ["1.B.1", "1.B.2", "1.B.3"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '2': {
														
 
															-                'basket': '2',
														
 
															-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.F', '2.G', '2.H'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "2": {
														
 
															+                "basket": "2",
														
 
															+                "basket_contents": ["2.A", "2.B", "2.C", "2.D", "2.F", "2.G", "2.H"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '2.A': {
														
 
															-                'basket': '2.A',
														
 
															-                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "2.A": {
														
 
															+                "basket": "2.A",
														
 
															+                "basket_contents": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '3': {
														
 
															-                'basket': '3',
														
 
															-                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                                    '3.H', '3.I'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "3": {
														
 
															+                "basket": "3",
														
 
															+                "basket_contents": [
														
 
															+                    "3.A",
														
 
															+                    "3.B",
														
 
															+                    "3.C",
														
 
															+                    "3.D",
														
 
															+                    "3.E",
														
 
															+                    "3.F",
														
 
															+                    "3.G",
														
 
															+                    "3.H",
														
 
															+                    "3.I",
														
 
															+                ],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '4': {
														
 
															-                'basket': '4',
														
 
															-                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "4": {
														
 
															+                "basket": "4",
														
 
															+                "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '4.E': {
														
 
															-                'basket': '4.E',
														
 
															-                'basket_contents': ['4.E.1', '4.E.2', '4.E.3'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "4.E": {
														
 
															+                "basket": "4.E",
														
 
															+                "basket_contents": ["4.E.1", "4.E.2", "4.E.3"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															-            '5': {
														
 
															-                'basket': '5',
														
 
															-                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
														
 
															-                'entities': ['KYOTOGHG (AR4GWP100)'],
														
 
															-                'dim': f'category ({coords_terminologies["category"]})',
														
 
															+            "5": {
														
 
															+                "basket": "5",
														
 
															+                "basket_contents": ["5.A", "5.B", "5.C", "5.D"],
														
 
															+                "entities": ["KYOTOGHG (AR4GWP100)"],
														
 
															+                "dim": f'category ({coords_terminologies["category"]})',
														
 
															             },
														
 
															         },
														
 
															-        'entities': {
														
 
															-            'KYOTO': {
														
 
															-                'basket': 'KYOTOGHG (AR4GWP100)',
														
 
															-                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
														
 
															-                                    'PFCS (AR4GWP100)', 'SF6'],
														
 
															-                'sel': {f'category ({coords_terminologies["category"]})':
														
 
															-                    [
														
 
															-                        '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
														
 
															-                        '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
														
 
															-                        '1.C',
														
 
															-                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
														
 
															-                        '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
														
 
															-                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
														
 
															-                        '3.H', '3.I',
														
 
															-                        '4', '4.A', '4.B', '4.C', '4.D',
														
 
															-                        '4.E', '4.E.1', '4.E.2', '4.E.3',
														
 
															-                        '5', '5.A', '5.B', '5.C', '5.D']},
														
 
															+        "entities": {
														
 
															+            "KYOTO": {
														
 
															+                "basket": "KYOTOGHG (AR4GWP100)",
														
 
															+                "basket_contents": [
														
 
															+                    "CH4",
														
 
															+                    "CO2",
														
 
															+                    "N2O",
														
 
															+                    "HFCS (AR4GWP100)",
														
 
															+                    "PFCS (AR4GWP100)",
														
 
															+                    "SF6",
														
 
															+                ],
														
 
															+                "sel": {
														
 
															+                    f'category ({coords_terminologies["category"]})': [
														
 
															+                        "1",
														
 
															+                        "1.A",
														
 
															+                        "1.A.1",
														
 
															+                        "1.A.2",
														
 
															+                        "1.A.3",
														
 
															+                        "1.A.4",
														
 
															+                        "1.A.5",
														
 
															+                        "1.B",
														
 
															+                        "1.B.1",
														
 
															+                        "1.B.2",
														
 
															+                        "1.B.3",
														
 
															+                        "1.C",
														
 
															+                        "2",
														
 
															+                        "2.A",
														
 
															+                        "2.A.1",
														
 
															+                        "2.A.2",
														
 
															+                        "2.A.3",
														
 
															+                        "2.A.4",
														
 
															+                        "2.B",
														
 
															+                        "2.C",
														
 
															+                        "2.D",
														
 
															+                        "2.F",
														
 
															+                        "2.G",
														
 
															+                        "2.H",
														
 
															+                        "3",
														
 
															+                        "3.A",
														
 
															+                        "3.B",
														
 
															+                        "3.C",
														
 
															+                        "3.D",
														
 
															+                        "3.E",
														
 
															+                        "3.F",
														
 
															+                        "3.G",
														
 
															+                        "3.H",
														
 
															+                        "3.I",
														
 
															+                        "4",
														
 
															+                        "4.A",
														
 
															+                        "4.B",
														
 
															+                        "4.C",
														
 
															+                        "4.D",
														
 
															+                        "4.E",
														
 
															+                        "4.E.1",
														
 
															+                        "4.E.2",
														
 
															+                        "4.E.3",
														
 
															+                        "5",
														
 
															+                        "5.A",
														
 
															+                        "5.B",
														
 
															+                        "5.C",
														
 
															+                        "5.D",
														
 
															+                    ]
														
 
															+                },
														
 
															             },
														
 
															         },
														
 
															     },
														
 
															-    'basket_copy': {
														
 
															-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															-        'entities': ["HFCS", "PFCS"],
														
 
															-        'source_GWP': gwp_to_use,
														
 
															+    "basket_copy": {
														
 
															+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
														
 
															+        "entities": ["HFCS", "PFCS"],
														
 
															+        "source_GWP": gwp_to_use,
														
 
															     },
														
 
															 }
														
 
															 cat_conversion = {
														
 
															-    'mapping': {
														
 
															-        '0': '0',
														
 
															-        'M.0.EL': 'M.0.EL',
														
 
															-        '1': '1',
														
 
															-        '1.A': '1.A',
														
 
															-        '1.A.1': '1.A.1',
														
 
															-        '1.A.1.a': '1.A.1.a',
														
 
															-        '1.A.1.b': '1.A.1.b',
														
 
															-        '1.A.2': '1.A.2',
														
 
															-        '1.A.3': '1.A.3',
														
 
															-        '1.A.3.a': '1.A.3.a',
														
 
															-        '1.A.3.b': '1.A.3.b',
														
 
															-        '1.A.3.c': '1.A.3.c',
														
 
															-        '1.A.3.d': '1.A.3.d',
														
 
															-        '1.A.4': '1.A.4',
														
 
															-        '1.A.5': '1.A.5',
														
 
															-        '1.B': '1.B',
														
 
															-        '1.B.1': '1.B.1',
														
 
															-        '1.B.2': '1.B.2',
														
 
															-        '1.B.3': '1.B.3',
														
 
															-        '1.C': '1.C',
														
 
															-        '1.C.1': '1.C.1',
														
 
															-        '1.C.2': '1.C.2',
														
 
															-        '1.C.3': '1.C.3',
														
 
															-        '2': '2',
														
 
															-        '2.A': '2.A',
														
 
															-        '2.A.1': '2.A.1',
														
 
															-        '2.A.2': '2.A.2',
														
 
															-        '2.A.3': '2.A.3',
														
 
															-        '2.A.4': '2.A.4',
														
 
															-        '2.A.4.b': '2.A.4.b',
														
 
															-        '2.A.4.d': '2.A.4.d',
														
 
															-        '2.B': '2.B',
														
 
															-        '2.B.2': '2.B.2',
														
 
															-        '2.B.4': '2.B.4',
														
 
															-        '2.B.8': '2.B.8',
														
 
															-        '2.B.8.b': '2.B.8.b',
														
 
															-        '2.B.8.c': '2.B.8.c',
														
 
															-        '2.B.8.e': '2.B.8.e',
														
 
															-        '2.B.8.f': '2.B.8.f',
														
 
															-        '2.C': '2.C',
														
 
															-        '2.C.1': '2.C.1',
														
 
															-        '2.D': '2.D',
														
 
															-        '2.D.1': '2.D.1',
														
 
															-        '2.F': '2.F',
														
 
															-        '2.F.1': '2.F.1',
														
 
															-        '2.G': '2.G',
														
 
															-        '2.G.1': '2.G.1',
														
 
															-        '2.H': '2.H',
														
 
															-        '2.H.1': '2.H.1',
														
 
															-        '2.H.2': '2.H.2',
														
 
															-        '3': 'M.AG',
														
 
															-        '3.A': '3.A.1',
														
 
															-        '3.B': '3.A.2',
														
 
															-        '3.C': 'M.3.C.1.b.i',  # field burning of agricultural residues
														
 
															-        '3.D': '3.C.2',  # Liming
														
 
															-        '3.E': '3.C.3',  # urea application
														
 
															-        '3.F': '3.C.4',  # direct N2O from agri soils
														
 
															-        '3.G': '3.C.5',  # indirect N2O from agri soils
														
 
															-        '3.H': '3.C.6',  # indirect N2O from manure management
														
 
															-        '3.I': '3.C.7',  # rice
														
 
															+    "mapping": {
														
 
															+        "0": "0",
														
 
															+        "M.0.EL": "M.0.EL",
														
 
															+        "1": "1",
														
 
															+        "1.A": "1.A",
														
 
															+        "1.A.1": "1.A.1",
														
 
															+        "1.A.1.a": "1.A.1.a",
														
 
															+        "1.A.1.b": "1.A.1.b",
														
 
															+        "1.A.2": "1.A.2",
														
 
															+        "1.A.3": "1.A.3",
														
 
															+        "1.A.3.a": "1.A.3.a",
														
 
															+        "1.A.3.b": "1.A.3.b",
														
 
															+        "1.A.3.c": "1.A.3.c",
														
 
															+        "1.A.3.d": "1.A.3.d",
														
 
															+        "1.A.4": "1.A.4",
														
 
															+        "1.A.5": "1.A.5",
														
 
															+        "1.B": "1.B",
														
 
															+        "1.B.1": "1.B.1",
														
 
															+        "1.B.2": "1.B.2",
														
 
															+        "1.B.3": "1.B.3",
														
 
															+        "1.C": "1.C",
														
 
															+        "1.C.1": "1.C.1",
														
 
															+        "1.C.2": "1.C.2",
														
 
															+        "1.C.3": "1.C.3",
														
 
															+        "2": "2",
														
 
															+        "2.A": "2.A",
														
 
															+        "2.A.1": "2.A.1",
														
 
															+        "2.A.2": "2.A.2",
														
 
															+        "2.A.3": "2.A.3",
														
 
															+        "2.A.4": "2.A.4",
														
 
															+        "2.A.4.b": "2.A.4.b",
														
 
															+        "2.A.4.d": "2.A.4.d",
														
 
															+        "2.B": "2.B",
														
 
															+        "2.B.2": "2.B.2",
														
 
															+        "2.B.4": "2.B.4",
														
 
															+        "2.B.8": "2.B.8",
														
 
															+        "2.B.8.b": "2.B.8.b",
														
 
															+        "2.B.8.c": "2.B.8.c",
														
 
															+        "2.B.8.e": "2.B.8.e",
														
 
															+        "2.B.8.f": "2.B.8.f",
														
 
															+        "2.C": "2.C",
														
 
															+        "2.C.1": "2.C.1",
														
 
															+        "2.D": "2.D",
														
 
															+        "2.D.1": "2.D.1",
														
 
															+        "2.F": "2.F",
														
 
															+        "2.F.1": "2.F.1",
														
 
															+        "2.G": "2.G",
														
 
															+        "2.G.1": "2.G.1",
														
 
															+        "2.H": "2.H",
														
 
															+        "2.H.1": "2.H.1",
														
 
															+        "2.H.2": "2.H.2",
														
 
															+        "3": "M.AG",
														
 
															+        "3.A": "3.A.1",
														
 
															+        "3.B": "3.A.2",
														
 
															+        "3.C": "M.3.C.1.b.i",  # field burning of agricultural residues
														
 
															+        "3.D": "3.C.2",  # Liming
														
 
															+        "3.E": "3.C.3",  # urea application
														
 
															+        "3.F": "3.C.4",  # direct N2O from agri soils
														
 
															+        "3.G": "3.C.5",  # indirect N2O from agri soils
														
 
															+        "3.H": "3.C.6",  # indirect N2O from manure management
														
 
															+        "3.I": "3.C.7",  # rice
														
 
															         #'4': 'M.LULUCF',
														
 
															-        '4.A': '3.B.1.a',  # forest remaining forest
														
 
															-        '4.B': '3.B.2.a',  # cropland remaining cropland
														
 
															-        '4.C': '3.B.2.b',  # land converted to cropland
														
 
															-        '4.D': '3.B.6.b',  # land converted to other land
														
 
															+        "4.A": "3.B.1.a",  # forest remaining forest
														
 
															+        "4.B": "3.B.2.a",  # cropland remaining cropland
														
 
															+        "4.C": "3.B.2.b",  # land converted to cropland
														
 
															+        "4.D": "3.B.6.b",  # land converted to other land
														
 
															         #'4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
														
 
															-        '4.E.1': '3.C.1.a', # biomass burning (Forest Land)
														
 
															-        '4.E.2': 'M.3.C.1.b.ii', # biomass burning (Cropland)
														
 
															-        '4.E.3': '3.C.1.d', # biomass burning (Other Land)
														
 
															-        '5': '4',
														
 
															-        '5.A': '4.A',
														
 
															-        '5.A.1': '4.A.1',
														
 
															-        '5.A.2': '4.A.2',
														
 
															-        '5.B': '4.B',
														
 
															-        '5.C': '4.C',
														
 
															-        '5.C.1': '4.C.1',
														
 
															-        '5.D': '4.D',
														
 
															-        '5.D.1': '4.D.1',
														
 
															-        '5.D.2': '4.D.2',
														
 
															-        'M.BK': 'M.BK',
														
 
															-        'M.BK.A': 'M.BK.A',
														
 
															-        'M.BK.M': 'M.BM.M',
														
 
															-        'M.BIO': 'M.BIO',
														
 
															+        "4.E.1": "3.C.1.a",  # biomass burning (Forest Land)
														
 
															+        "4.E.2": "M.3.C.1.b.ii",  # biomass burning (Cropland)
														
 
															+        "4.E.3": "3.C.1.d",  # biomass burning (Other Land)
														
 
															+        "5": "4",
														
 
															+        "5.A": "4.A",
														
 
															+        "5.A.1": "4.A.1",
														
 
															+        "5.A.2": "4.A.2",
														
 
															+        "5.B": "4.B",
														
 
															+        "5.C": "4.C",
														
 
															+        "5.C.1": "4.C.1",
														
 
															+        "5.D": "4.D",
														
 
															+        "5.D.1": "4.D.1",
														
 
															+        "5.D.2": "4.D.2",
														
 
															+        "M.BK": "M.BK",
														
 
															+        "M.BK.A": "M.BK.A",
														
 
															+        "M.BK.M": "M.BM.M",
														
 
															+        "M.BIO": "M.BIO",
														
 
															     },
														
 
															-    'aggregate': {
														
 
															-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
														
 
															-        '3.C.1.b': {'sources': ['M.3.C.1.b.i', 'M.3.C.1.b.ii'],
														
 
															-                  'name': 'Biomass Burning In Cropland'},
														
 
															-        'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'],
														
 
															-                  'name': 'Biomass Burning (Agriculture)'},
														
 
															-        'M.3.C.1.LU': {'sources': ['3.C.1.a', '3.C.1.d'],
														
 
															-                  'name': 'Biomass Burning (LULUCF)'},
														
 
															-        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
														
 
															-                  'name': 'Emissions from Biomass Burning'},
														
 
															-        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
														
 
															-        'M.3.C.AG': {
														
 
															-            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
														
 
															-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
														
 
															-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
														
 
															-                     'name': 'Agriculture excluding livestock emissions'},
														
 
															-        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
														
 
															-                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
														
 
															-        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
														
 
															-        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
														
 
															-        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
														
 
															-        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
														
 
															-        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
														
 
															-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
														
 
															+    "aggregate": {
														
 
															+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
														
 
															+        "3.C.1.b": {
														
 
															+            "sources": ["M.3.C.1.b.i", "M.3.C.1.b.ii"],
														
 
															+            "name": "Biomass Burning In Cropland",
														
 
															+        },
														
 
															+        "M.3.C.1.AG": {
														
 
															+            "sources": ["3.C.1.b", "3.C.1.c"],
														
 
															+            "name": "Biomass Burning (Agriculture)",
														
 
															+        },
														
 
															+        "M.3.C.1.LU": {
														
 
															+            "sources": ["3.C.1.a", "3.C.1.d"],
														
 
															+            "name": "Biomass Burning (LULUCF)",
														
 
															+        },
														
 
															+        "3.C.1": {
														
 
															+            "sources": ["M.3.C.1.AG", "M.3.C.1.LU"],
														
 
															+            "name": "Emissions from Biomass Burning",
														
 
															+        },
														
 
															+        "3.C": {
														
 
															+            "sources": ["3.C.1", "3.C.2", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land",
														
 
															+        },
														
 
															+        "M.3.C.AG": {
														
 
															+            "sources": [
														
 
															+                "M.3.C.1.AG",
														
 
															+                "3.C.2",
														
 
															+                "3.C.3",
														
 
															+                "3.C.4",
														
 
															+                "3.C.5",
														
 
															+                "3.C.6",
														
 
															+                "3.C.7",
														
 
															+            ],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
														
 
															+        },
														
 
															+        "M.AG.ELV": {
														
 
															+            "sources": ["M.3.C.AG"],
														
 
															+            "name": "Agriculture excluding livestock emissions",
														
 
															+        },
														
 
															+        "M.3.C.LU": {
														
 
															+            "sources": ["M.3.C.1.LU"],
														
 
															+            "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
														
 
															+        },
														
 
															+        "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},
														
 
															+        "3.B.2": {"sources": ["3.B.2.a", "3.B.2.b"], "name": "Cropland"},
														
 
															+        "3.B.6": {"sources": ["3.B.6.b"], "name": "Other Land"},
														
 
															+        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.6"], "name": "Land"},
														
 
															+        "M.LULUCF": {"sources": ["3.B", "N.3.C.LU"], "name": "LULUCF"},
														
 
															+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
														
 
															     },
														
 
															 }
														
 
															 sectors_to_save = [
														
 
															-    '1', '1.A', '1.A.1', '1.A.1.a', '1.A.1.b', '1.A.2', '1.A.3', '1.A.3.a', '1.A.3.b',
														
 
															-    '1.A.3.c', '1.A.3.d', '1.A.4', '1.A.5',
														
 
															-    '1.B', '1.B.1', '1.B.2', '1.B.3', '1.C', '1.C.1', '1.C.2', '1.C.3',
														
 
															-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
														
 
															-    '2.B', '2.B.2', '2.B.4', '2.B.8', '2.B.8.a', '2.B.8.c', '2.B.8.e', '2.B.8.f',
														
 
															-    '2.C', '2.C.1', '2.F', '2.F.1', '2.G', '2.G.1', '2.H', '2.H.1', '2.H.2',
														
 
															-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
														
 
															-    '3.C', '3.C.1', '3.C.1.a', '3.C.1.b', '3.C.1.d', '3.C.2', '3.C.3', '3.C.4',
														
 
															-    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
														
 
															-    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
														
 
															-    '3.B.2.b', '3.B.6', '3.B.6.b',
														
 
															-    '4', '4.A', '4.A.1', '4.A.2', '4.B', '4.C', '4.C.1', '4.D', '4.D.1', '4.D.2',
														
 
															-    '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO']
														
 
															+    "1",
														
 
															+    "1.A",
														
 
															+    "1.A.1",
														
 
															+    "1.A.1.a",
														
 
															+    "1.A.1.b",
														
 
															+    "1.A.2",
														
 
															+    "1.A.3",
														
 
															+    "1.A.3.a",
														
 
															+    "1.A.3.b",
														
 
															+    "1.A.3.c",
														
 
															+    "1.A.3.d",
														
 
															+    "1.A.4",
														
 
															+    "1.A.5",
														
 
															+    "1.B",
														
 
															+    "1.B.1",
														
 
															+    "1.B.2",
														
 
															+    "1.B.3",
														
 
															+    "1.C",
														
 
															+    "1.C.1",
														
 
															+    "1.C.2",
														
 
															+    "1.C.3",
														
 
															+    "2",
														
 
															+    "2.A",
														
 
															+    "2.A.1",
														
 
															+    "2.A.2",
														
 
															+    "2.A.3",
														
 
															+    "2.A.4",
														
 
															+    "2.A.4.b",
														
 
															+    "2.A.4.d",
														
 
															+    "2.B",
														
 
															+    "2.B.2",
														
 
															+    "2.B.4",
														
 
															+    "2.B.8",
														
 
															+    "2.B.8.a",
														
 
															+    "2.B.8.c",
														
 
															+    "2.B.8.e",
														
 
															+    "2.B.8.f",
														
 
															+    "2.C",
														
 
															+    "2.C.1",
														
 
															+    "2.F",
														
 
															+    "2.F.1",
														
 
															+    "2.G",
														
 
															+    "2.G.1",
														
 
															+    "2.H",
														
 
															+    "2.H.1",
														
 
															+    "2.H.2",
														
 
															+    "3",
														
 
															+    "M.AG",
														
 
															+    "3.A",
														
 
															+    "3.A.1",
														
 
															+    "3.A.2",
														
 
															+    "3.C",
														
 
															+    "3.C.1",
														
 
															+    "3.C.1.a",
														
 
															+    "3.C.1.b",
														
 
															+    "3.C.1.d",
														
 
															+    "3.C.2",
														
 
															+    "3.C.3",
														
 
															+    "3.C.4",
														
 
															+    "3.C.5",
														
 
															+    "3.C.6",
														
 
															+    "3.C.7",
														
 
															+    "M.3.C.1.AG",
														
 
															+    "M.3.C.AG",
														
 
															+    "M.AG.ELV",
														
 
															+    "M.LULUCF",
														
 
															+    "M.3.C.1.LU",
														
 
															+    "M.3.C.LU",
														
 
															+    "3.B",
														
 
															+    "3.B.1",
														
 
															+    "3.B.1.a",
														
 
															+    "3.B.2",
														
 
															+    "3.B.2.a",
														
 
															+    "3.B.2.b",
														
 
															+    "3.B.6",
														
 
															+    "3.B.6.b",
														
 
															+    "4",
														
 
															+    "4.A",
														
 
															+    "4.A.1",
														
 
															+    "4.A.2",
														
 
															+    "4.B",
														
 
															+    "4.C",
														
 
															+    "4.C.1",
														
 
															+    "4.D",
														
 
															+    "4.D.1",
														
 
															+    "4.D.2",
														
 
															+    "0",
														
 
															+    "M.0.EL",
														
 
															+    "M.BK",
														
 
															+    "M.BK.A",
														
 
															+    "M.BK.M",
														
 
															+    "M.BIO",
														
 
															+]
														
 
															 # gas baskets
														
 
															 gas_baskets = {
														
 
															-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
														
 
															-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
														
 
															-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
														
 
															-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
														
 
															-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
														
 
															-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
														
 
															+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
														
 
															+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
														
 
															+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
														
 
															+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
														
 
															+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
														
 
															+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -1,10 +1,14 @@
 
															-# this script reads data from Thailand's BUR3
														
 
															-# Data is read from the pdf file
														
 
															+"""
														
 
															+Read Thailand's BUR3 from pdf
														
 
															+This script reads data from Thailand's BUR3
														
 
															+Data are read from pdf using camelot
														
 
															+
														
 
															+"""
														
 
															 import camelot
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_tha_bur3 import (
														
 
															+from config_tha_bur3 import (
														
 
															     cat_conversion,
														
 
															     coords_cols,
														
 
															     coords_cols_indirect,
														
@@ -26,53 +30,65 @@ from .config_tha_bur3 import (
 
															     trend_conf,
														
 
															 )
														
 
															-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    process_data_for_country,
														
 
															+)
														
 
															 if __name__ == "__main__":
														
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Thailand' / 'BUR3'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Thailand'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Thailand" / "BUR3"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Thailand"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    inventory_file = 'BUR3_Thailand_251220_.pdf'
														
 
															-    output_filename = 'THA_BUR3_2020_'
														
 
															+    inventory_file = "BUR3_Thailand_251220_.pdf"
														
 
															+    output_filename = "THA_BUR3_2020_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															     # inventory tables
														
 
															-    pages_inventory = '68,69'
														
 
															+    pages_inventory = "68,69"
														
 
															     # main sector time series
														
 
															-    page_main_sector_ts = '70'
														
 
															+    page_main_sector_ts = "70"
														
 
															     # indirect gases time series
														
 
															-    page_indirect = '72'
														
 
															-
														
 
															+    page_indirect = "72"
														
 
															     # ###
														
 
															     # read the inventory data and convert to PM2 IF
														
 
															     # ###
														
 
															-    tables_inventory = camelot.read_pdf(str(input_folder / inventory_file), pages=pages_inventory,
														
 
															-                                        split_text=True, flavor="lattice")
														
 
															+    tables_inventory = camelot.read_pdf(
														
 
															+        str(input_folder / inventory_file),
														
 
															+        pages=pages_inventory,
														
 
															+        split_text=True,
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     df_inventory = tables_inventory[0].df[1:]
														
 
															     df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
														
 
															-    df_inventory = pd.concat([df_header, df_inventory, tables_inventory[1].df.iloc[1:]],
														
 
															-                             axis=0, join='outer')
														
 
															+    df_inventory = pd.concat(
														
 
															+        [df_header, df_inventory, tables_inventory[1].df.iloc[1:]], axis=0, join="outer"
														
 
															+    )
														
 
															-    df_inventory = pm2.pm2io.nir_add_unit_information(df_inventory,
														
 
															-                                                      unit_row=inv_conf["unit_row"],
														
 
															-                                                      entity_row=inv_conf["entity_row"],
														
 
															-                                                      regexp_entity=".*", regexp_unit=".*",
														
 
															-                                                      default_unit="Gg")
														
 
															+    df_inventory = pm2.pm2io.nir_add_unit_information(
														
 
															+        df_inventory,
														
 
															+        unit_row=inv_conf["unit_row"],
														
 
															+        entity_row=inv_conf["entity_row"],
														
 
															+        regexp_entity=".*",
														
 
															+        regexp_unit=".*",
														
 
															+        default_unit="Gg",
														
 
															+    )
														
 
															     # set index and convert to long format
														
 
															     df_inventory = df_inventory.set_index(inv_conf["index_cols"])
														
 
															-    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
														
 
															-                                                         inv_conf["header_long"])
														
 
															+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+        df_inventory, inv_conf["year"], inv_conf["header_long"]
														
 
															+    )
														
 
															     df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
														
 
															     # prep for conversion to PM2 IF and native format
														
@@ -81,24 +97,29 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															-    df_inventory_long["category"] = \
														
 
															-        df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
														
 
															+    df_inventory_long["category"] = df_inventory_long["category"].replace(
														
 
															+        inv_conf["cat_codes_manual"]
														
 
															+    )
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-        return m.group('code')
														
 
															-    df_inventory_long["category"] = \
														
 
															-        df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
														
 
															-                                                  regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
														
 
															+        inv_conf["cat_code_regexp"], repl, regex=True
														
 
															+    )
														
 
															     df_inventory_long = df_inventory_long.reset_index(drop=True)
														
 
															     # replace "," with "" in data
														
 
															-    def repl(m):
														
 
															-        return m.group('part1') + m.group('part2')
														
 
															-    df_inventory_long.loc[:, "data"] = \
														
 
															-        df_inventory_long.loc[:, "data"].str.replace(
														
 
															-            '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.\
														
 
															-        replace(' ','', regex=False)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace(
														
 
															+        "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+    )
														
 
															+    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace(
														
 
															+        " ", "", regex=False
														
 
															+    )
														
 
															     # make sure all col headers are str
														
 
															     df_inventory_long.columns = df_inventory_long.columns.map(str)
														
@@ -108,27 +129,31 @@ if __name__ == "__main__":
 
															     data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_inventory_long,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     # ###
														
 
															     # read the main sector time series and convert to PM2 IF
														
 
															     # ###
														
 
															-    tables_main_sector_ts = camelot.read_pdf(str(input_folder / inventory_file), pages=page_main_sector_ts,
														
 
															-                                        split_text=True, flavor="lattice")
														
 
															+    tables_main_sector_ts = camelot.read_pdf(
														
 
															+        str(input_folder / inventory_file),
														
 
															+        pages=page_main_sector_ts,
														
 
															+        split_text=True,
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     df_main_sector_ts = tables_main_sector_ts[0].df.iloc[2:]
														
 
															-    #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
														
 
															-    #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
														
 
															+    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
														
 
															+    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
														
 
															     df_main_sector_ts.columns = [trend_conf["header"], trend_conf["unit"]]
														
 
															     df_main_sector_ts = df_main_sector_ts.transpose()
														
@@ -141,42 +166,49 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(
														
 
															-        trend_conf["cat_codes_manual"])
														
 
															+        trend_conf["cat_codes_manual"]
														
 
															+    )
														
 
															-    def repl(m):
														
 
															-        return m.group('part1') + m.group('part2')
														
 
															-    year_cols = list(set(df_main_sector_ts.columns) - set(['category', 'unit']))
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+    year_cols = list(set(df_main_sector_ts.columns) - set(["category", "unit"]))
														
 
															     for col in year_cols:
														
 
															-        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
														
 
															-            replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
														
 
															-            replace(' ','', regex=False)
														
 
															+        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace(
														
 
															+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+        )
														
 
															+        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace(
														
 
															+            " ", "", regex=False
														
 
															+        )
														
 
															     data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_main_sector_ts,
														
 
															         coords_cols=coords_cols_main_sector_ts,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults_main_sector_ts,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															-
														
 
															+    )
														
 
															     # ###
														
 
															     # read the indirect gases time series and convert to PM2 IF
														
 
															     # ###
														
 
															-    tables_indirect = camelot.read_pdf(str(input_folder / inventory_file), pages=page_indirect,
														
 
															-                                        split_text=True, flavor="lattice")
														
 
															+    tables_indirect = camelot.read_pdf(
														
 
															+        str(input_folder / inventory_file),
														
 
															+        pages=page_indirect,
														
 
															+        split_text=True,
														
 
															+        flavor="lattice",
														
 
															+    )
														
 
															     df_indirect = tables_indirect[0].df.iloc[2:]
														
 
															-    #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
														
 
															-    #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
														
 
															+    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
														
 
															+    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
														
 
															     df_indirect.columns = [ind_conf["header"], ind_conf["unit"]]
														
 
															     df_indirect = df_indirect.transpose()
														
@@ -188,29 +220,32 @@ if __name__ == "__main__":
 
															     df_indirect = df_indirect.drop(0)
														
 
															     df_indirect = df_indirect.drop(columns=ind_conf["cols_to_remove"])
														
 
															-    def repl(m):
														
 
															-        return m.group('part1') + m.group('part2')
														
 
															-    year_cols = list(set(df_indirect.columns) - set(['entity', 'unit']))
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("part1") + m.group("part2")
														
 
															+
														
 
															+    year_cols = list(set(df_indirect.columns) - set(["entity", "unit"]))
														
 
															     for col in year_cols:
														
 
															-        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
														
 
															-            replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
														
 
															-        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
														
 
															-            replace(' ','', regex=False)
														
 
															+        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace(
														
 
															+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
														
 
															+        )
														
 
															+        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace(
														
 
															+            " ", "", regex=False
														
 
															+        )
														
 
															     data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_indirect,
														
 
															         coords_cols=coords_cols_indirect,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults_indirect,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     # ###
														
 
															     # merge the three datasets
														
@@ -231,12 +266,15 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															-        data_all_if)
														
 
															+        data_all_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_all_pm2.data_vars}
														
 
															     data_all_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # ## process the data
														
@@ -244,14 +282,15 @@ if __name__ == "__main__":
 
															     data_proc_pm2 = data_all_pm2
														
 
															     # combine CO2 emissions and removals
														
 
															-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
														
 
															-        (dim="entity", skipna=True, min_count=1)
														
 
															-    data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
														
 
															+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
														
 
															+        dim="entity", skipna=True, min_count=1
														
 
															+    )
														
 
															+    data_proc_pm2["CO2"].attrs["entity"] = "CO2"
														
 
															     # actual processing
														
 
															     data_proc_pm2 = process_data_for_country(
														
 
															         data_proc_pm2,
														
 
															-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
														
 
															+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
														
 
															         gas_baskets={},
														
 
															         processing_info_country=country_processing_step1,
														
 
															     )
														
@@ -261,16 +300,16 @@ if __name__ == "__main__":
 
															         entities_to_ignore=[],
														
 
															         gas_baskets=gas_baskets,
														
 
															         processing_info_country=country_processing_step2,
														
 
															-        cat_terminology_out = terminology_proc,
														
 
															-        category_conversion = cat_conversion,
														
 
															-        sectors_out = sectors_to_save,
														
 
															+        cat_terminology_out=terminology_proc,
														
 
															+        category_conversion=cat_conversion,
														
 
															+        sectors_out=sectors_to_save,
														
 
															     )
														
 
															     # adapt source and metadata
														
 
															     # TODO: processing info is present twice
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -279,9 +318,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
@@ -1,18 +1,27 @@
 
															-# this script reads data from Thailand's BUR3
														
 
															-# Data is read from two csv files which have been created manually from ocr processed
														
 
															-# pdf files
														
 
															-# pdftk Thailand_BUR4_final_28122022.pdf cat 65-67east output inventory_2019.pdf
														
 
															-# ocrmypdf --force-ocr inventory_2019.pdf inventory_2019_ocr.pdf
														
 
															-# pdftk Thailand_BUR4_final_28122022.pdf cat 69 output trends.pdf
														
 
															-# ocrmypdf --force-ocr trends.pdf trends_ocr.pdf
														
 
															-
														
 
															-# values for HFCs and SF6 have been taken from Table2-9 where they are present in
														
 
															-# CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
														
 
															-# tables
														
 
															+"""
														
 
															+Read Thailand's BUR4 from pdf
														
 
															+
														
 
															+This script reads data from Thailand's BUR4
														
 
															+Data is read from two csv files which have been created manually from ocr processed
														
 
															+pdf files
														
 
															+
														
 
															+.. code-block:: bash
														
 
															+
														
 
															+    pdftk Thailand_BUR4_final_28122022.pdf cat 65-67east output inventory_2019.pdf
														
 
															+    ocrmypdf --force-ocr inventory_2019.pdf inventory_2019_ocr.pdf
														
 
															+    pdftk Thailand_BUR4_final_28122022.pdf cat 69 output trends.pdf
														
 
															+    ocrmypdf --force-ocr trends.pdf trends_ocr.pdf
														
 
															+
														
 
															+Values for HFCs and SF6 have been taken from Table2-9 where they are present in
														
 
															+CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
														
 
															+tables
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from .config_tha_bur4 import (
														
 
															+from config_tha_bur4 import (
														
 
															     cat_codes_manual_main_sector_ts,
														
 
															     cat_conversion,
														
 
															     coords_cols,
														
@@ -33,36 +42,45 @@ from .config_tha_bur4 import (
 
															     terminology_proc,
														
 
															 )
														
 
															-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    process_data_for_country,
														
 
															+)
														
 
															 if __name__ == "__main__":
														
 
															     # ###
														
 
															     # configuration
														
 
															     # ###
														
 
															-    input_folder = downloaded_data_path / 'UNFCCC' / 'Thailand' / 'BUR4'
														
 
															-    output_folder = extracted_data_path / 'UNFCCC' / 'Thailand'
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Thailand" / "BUR4"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Thailand"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															-    inventory_file = 'THA_inventory_2019.csv'
														
 
															-    trends_file = 'THA_trends_2000-2019.csv'
														
 
															-    indirect_file = 'THA_indirect_2000-2019.csv'
														
 
															-    output_filename = 'THA_BUR4_2022_'
														
 
															+    inventory_file = "THA_inventory_2019.csv"
														
 
															+    trends_file = "THA_trends_2000-2019.csv"
														
 
															+    indirect_file = "THA_indirect_2000-2019.csv"
														
 
															+    output_filename = "THA_BUR4_2022_"
														
 
															     compression = dict(zlib=True, complevel=9)
														
 
															-
														
 
															     # ###
														
 
															     # read the inventory data and convert to PM2 IF
														
 
															     # ###
														
 
															-    df_inventory = pd.read_csv(input_folder /inventory_file, header=None)
														
 
															+    df_inventory = pd.read_csv(input_folder / inventory_file, header=None)
														
 
															     df_inventory = pm2.pm2io.nir_add_unit_information(
														
 
															-        df_inventory, unit_row=inv_conf["unit_row"], entity_row=inv_conf["entity_row"],
														
 
															-        regexp_entity=".*", regexp_unit=".*", default_unit="Gg")
														
 
															+        df_inventory,
														
 
															+        unit_row=inv_conf["unit_row"],
														
 
															+        entity_row=inv_conf["entity_row"],
														
 
															+        regexp_entity=".*",
														
 
															+        regexp_unit=".*",
														
 
															+        default_unit="Gg",
														
 
															+    )
														
 
															     # set index and convert to long format
														
 
															     df_inventory = df_inventory.set_index(inv_conf["index_cols"])
														
 
															-    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
														
 
															-                                                         inv_conf["header_long"])
														
 
															+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
														
 
															+        df_inventory, inv_conf["year"], inv_conf["header_long"]
														
 
															+    )
														
 
															     df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
														
 
															     # prep for conversion to PM2 IF and native format
														
@@ -71,14 +89,17 @@ if __name__ == "__main__":
 
															     # replace cat names by codes in col "category"
														
 
															     # first the manual replacements
														
 
															-    df_inventory_long["category"] = \
														
 
															-        df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
														
 
															+    df_inventory_long["category"] = df_inventory_long["category"].replace(
														
 
															+        inv_conf["cat_codes_manual"]
														
 
															+    )
														
 
															+
														
 
															     # then the regex replacements
														
 
															-    def repl(m):
														
 
															-        return m.group('code')
														
 
															-    df_inventory_long["category"] = \
														
 
															-        df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
														
 
															-                                                  regex=True)
														
 
															+    def repl(m):  # noqa: D103
														
 
															+        return m.group("code")
														
 
															+
														
 
															+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
														
 
															+        inv_conf["cat_code_regexp"], repl, regex=True
														
 
															+    )
														
 
															     df_inventory_long = df_inventory_long.reset_index(drop=True)
														
 
															     # make sure all col headers are str
														
@@ -89,17 +110,17 @@ if __name__ == "__main__":
 
															     data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
														
 
															         df_inventory_long,
														
 
															         coords_cols=coords_cols,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     # ###
														
 
															     # read the main sector time series and convert to PM2 IF
														
@@ -115,24 +136,24 @@ if __name__ == "__main__":
 
															     df_main_sector_ts = df_main_sector_ts.drop(0)
														
 
															     # replace cat names by codes in col "category"
														
 
															-    df_main_sector_ts["category"] = \
														
 
															-        df_main_sector_ts["category"].replace(cat_codes_manual_main_sector_ts)
														
 
															+    df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(
														
 
															+        cat_codes_manual_main_sector_ts
														
 
															+    )
														
 
															     data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_main_sector_ts,
														
 
															         coords_cols=coords_cols_main_sector_ts,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults_main_sector_ts,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															-        time_format='%Y',
														
 
															-        )
														
 
															-
														
 
															+        time_format="%Y",
														
 
															+    )
														
 
															     # ###
														
 
															     # read the indirect gases time series and convert to PM2 IF
														
@@ -150,17 +171,17 @@ if __name__ == "__main__":
 
															     data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_indirect,
														
 
															         coords_cols=coords_cols_indirect,
														
 
															-        #add_coords_cols=add_coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															         coords_defaults=coords_defaults_indirect,
														
 
															         coords_terminologies=coords_terminologies,
														
 
															         coords_value_mapping=coords_value_mapping,
														
 
															-        #coords_value_filling=coords_value_filling,
														
 
															-        #filter_remove=filter_remove,
														
 
															-        #filter_keep=filter_keep,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															         meta_data=meta_data,
														
 
															         convert_str=True,
														
 
															         time_format="%Y",
														
 
															-        )
														
 
															+    )
														
 
															     # ###
														
 
															     # merge the three datasets
														
@@ -181,12 +202,15 @@ if __name__ == "__main__":
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
														
 
															-        data_all_if)
														
 
															+        data_all_if,
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_all_pm2.data_vars}
														
 
															     data_all_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder
														
 
															+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
														
 
															+        encoding=encoding,
														
 
															+    )
														
 
															     # ###
														
 
															     # ## process the data
														
@@ -194,14 +218,15 @@ if __name__ == "__main__":
 
															     data_proc_pm2 = data_all_pm2
														
 
															     # combine CO2 emissions and removals
														
 
															-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
														
 
															-        (dim="entity", skipna=True, min_count=1)
														
 
															-    data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
														
 
															+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
														
 
															+        dim="entity", skipna=True, min_count=1
														
 
															+    )
														
 
															+    data_proc_pm2["CO2"].attrs["entity"] = "CO2"
														
 
															     # actual processing
														
 
															     data_proc_pm2 = process_data_for_country(
														
 
															         data_proc_pm2,
														
 
															-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
														
 
															+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
														
 
															         gas_baskets={},
														
 
															         processing_info_country=country_processing_step1,
														
 
															     )
														
@@ -211,16 +236,16 @@ if __name__ == "__main__":
 
															         entities_to_ignore=[],
														
 
															         gas_baskets=gas_baskets,
														
 
															         processing_info_country=country_processing_step2,
														
 
															-        cat_terminology_out = terminology_proc,
														
 
															-        category_conversion = cat_conversion,
														
 
															-        sectors_out = sectors_to_save,
														
 
															+        cat_terminology_out=terminology_proc,
														
 
															+        category_conversion=cat_conversion,
														
 
															+        sectors_out=sectors_to_save,
														
 
															     )
														
 
															     # adapt source and metadata
														
 
															     # TODO: processing info is present twice
														
 
															-    current_source = data_proc_pm2.coords["source"].values[0]
														
 
															+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
														
 
															     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
														
 
															-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
														
 
															+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
														
 
															     # ###
														
 
															     # save data to IF and native format
														
@@ -229,9 +254,10 @@ if __name__ == "__main__":
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
 
															     pm2.pm2io.write_interchange_format(
														
 
															-        output_folder / (output_filename + terminology_proc), data_proc_if)
														
 
															+        output_folder / (output_filename + terminology_proc), data_proc_if
														
 
															+    )
														
 
															     encoding = {var: compression for var in data_proc_pm2.data_vars}
														
 
															     data_proc_pm2.pr.to_netcdf(
														
 
															-        output_folder / (output_filename + terminology_proc + ".nc"),
														
 
															-        encoding=encoding)
														
 
															+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
														
 
															+    )
														
--- a/src/unfccc_ghg_data/unfccc_reader/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/__init__.py
@@ -1 +1,16 @@
 
															-#
														
 
															+"""Read individual country submissions
														
 
															+
														
 
															+The UNFCCC reader contains code to read individual country inventories,
														
 
															+mostly submitted by non-AnnexI countries to the UNFCCC as Biannial Update Reports (
														
 
															+BUR), National Communications (NC), and National Inventory Reports (NIR). Code tyo
														
 
															+read other official country repositories is also included here as it uses the same
														
 
															+setup.
														
 
															+
														
 
															+The code is organized in country folders which contain scripts for each submission
														
 
															+and configuration files which can also be used for several submissions if the
														
 
															+configuration is sufficiently similar.
														
 
															+
														
 
															+Data are mostly read from pdf files using camelot, but in some cases machine-readable
														
 
															+files like xlsx are available which we prefer over pdfs.
														
 
															+
														
 
															+"""
														
--- a/src/unfccc_ghg_data/unfccc_reader/get_submissions_info.py
+++ b/src/unfccc_ghg_data/unfccc_reader/get_submissions_info.py
@@ -1,19 +1,28 @@
 
															-# helper functions to get information on available submissions
														
 
															-# and data reading functions for a given country
														
 
															+"""
														
 
															+Helper functions for the unfccc_reader
														
 
															+
														
 
															+helper functions to get information on available submissions
														
 
															+and data reading functions for a given country
														
 
															+"""
														
 
															 import json
														
 
															 from pathlib import Path
														
 
															-from unfccc_ghg_data.helper import (downloaded_data_path, extracted_data_path,
														
 
															-                                    get_country_code, root_path)
														
 
															+from unfccc_ghg_data.helper import (
														
 
															+    downloaded_data_path,
														
 
															+    extracted_data_path,
														
 
															+    get_country_code,
														
 
															+    root_path,
														
 
															+)
														
 
															 code_path = root_path / "src" / "unfccc_ghg_data" / "unfccc_reader"
														
 
															 # TODO: change this to use the code path stored in the helper module
														
 
															+
														
 
															 def get_possible_inputs(
														
 
															-        country_name: str,
														
 
															-        submission: str,
														
 
															-        print_info: bool = False,
														
 
															+    country_name: str,
														
 
															+    submission: str,
														
 
															+    print_info: bool = False,
														
 
															 ) -> list[Path]:
														
 
															     """
														
 
															     For given country name and submission find the possible input files
														
@@ -71,10 +80,10 @@ def get_possible_inputs(
 
															 def get_possible_outputs(
														
 
															-        country_name: str,
														
 
															-        submission: str,
														
 
															-        print_info: bool = False,
														
 
															-)-> list[Path]:
														
 
															+    country_name: str,
														
 
															+    submission: str,
														
 
															+    print_info: bool = False,
														
 
															+) -> list[Path]:
														
 
															     """
														
 
															     For given country name and submission find the possible output files
														
@@ -109,11 +118,15 @@ def get_possible_outputs(
 
															             if country_code in folder_mapping:
														
 
															                 country_folder = folder_mapping[country_code]
														
 
															                 if not isinstance(country_folder, str):
														
 
															-                    raise ValueError("Wrong data type in folder mapping json file. Should be str.")
														
 
															+                    raise ValueError(
														
 
															+                        "Wrong data type in folder mapping " "json file. Should be str."
														
 
															+                    )
														
 
															                 output_folder = item / country_folder
														
 
															                 if output_folder.exists():
														
 
															-                    for filepath in output_folder.glob(country_code + "_" + submission + "*"):
														
 
															+                    for filepath in output_folder.glob(
														
 
															+                        country_code + "_" + submission + "*"
														
 
															+                    ):
														
 
															                         output_files.append(filepath.relative_to(root_path))
														
 
															     if print_info:
														
@@ -125,9 +138,3 @@ def get_possible_outputs(
 
															             print("No output files found")
														
 
															     return output_files
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py
+++ b/src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py
@@ -1,28 +1,34 @@
 
															-# this script takes submission and country as input (from doit) and
														
 
															-# runs the appropriate script to extract the submission data
														
 
															+"""
														
 
															+wrapper to read UNFCCC submission
														
 
															+
														
 
															+Take submission and country as input (from doit) and
														
 
															+run the appropriate script to extract the submission data
														
 
															+
														
 
															+"""
														
 
															+
														
 
															 import argparse
														
 
															 import datalad.api
														
 
															-from .get_submissions_info import (get_possible_inputs, get_possible_outputs)
														
 
															 from unfccc_ghg_data.helper import get_code_file, root_path
														
 
															+from .get_submissions_info import get_possible_inputs, get_possible_outputs
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															     # Find the right function and possible input and output files and
														
 
															     # read the data using datalad run.
														
 
															     parser = argparse.ArgumentParser()
														
 
															-    parser.add_argument('--country', help='Country name or code')
														
 
															-    parser.add_argument('--submission', help='Submission to read')
														
 
															+    parser.add_argument("--country", help="Country name or code")
														
 
															+    parser.add_argument("--submission", help="Submission to read")
														
 
															     args = parser.parse_args()
														
 
															     country = args.country
														
 
															     submission = args.submission
														
 
															-
														
 
															     print(f"Attempting to extract data for {submission} from {country}.")
														
 
															-    print("#"*80)
														
 
															+    print("#" * 80)
														
 
															     print("")
														
 
															     # get the correct script
														
@@ -35,8 +41,10 @@ if __name__ == "__main__":
 
															         # get possible input files
														
 
															         input_files = get_possible_inputs(country, submission)
														
 
															         if not input_files:
														
 
															-            print(f"No possible input files found for {country}, {submission}. "
														
 
															-                  f"Something might be wrong here.")
														
 
															+            print(
														
 
															+                f"No possible input files found for {country}, {submission}. "
														
 
															+                f"Something might be wrong here."
														
 
															+            )
														
 
															         else:
														
 
															             print("Found the following input_files:")
														
 
															             for file in input_files:
														
@@ -51,8 +59,10 @@ if __name__ == "__main__":
 
															         # get possible output files
														
 
															         output_files = get_possible_outputs(country, submission)
														
 
															         if not output_files:
														
 
															-            print(f"No possible output files found for {country}, {submission}. "
														
 
															-                  f"This is either the first run or something is wrong.")
														
 
															+            print(
														
 
															+                f"No possible output files found for {country}, {submission}. "
														
 
															+                f"This is either the first run or something is wrong."
														
 
															+            )
														
 
															         else:
														
 
															             print("Found the following output_files:")
														
 
															             for file in output_files:
														
@@ -74,6 +84,7 @@ if __name__ == "__main__":
 
															     else:
														
 
															         # no code found.
														
 
															         print(f"No code found to read {submission} from {country}")
														
 
															-        print(f"Use 'doit country_info country={country} to get "
														
 
															-              f"a list of available submissions and datasets.")
														
 
															-
														
 
															+        print(
														
 
															+            f"Use 'doit country_info country={country} to get "
														
 
															+            f"a list of available submissions and datasets."
														
 
															+        )