1 tahun lalu · 06a9aceb8e
--- a/Makefile
+++ b/Makefile
@@ -40,6 +40,10 @@ black:  ## format the code using black
 
				 ruff-fixes:  ## fix the code using ruff
			
 
				 	poetry run ruff src tests scripts docs/source/conf.py docs/source/notebooks/*.py --fix
			
 
				 
			
 
				+.PHONY: ruff-fixes-current
			
 
				+ruff-fixes-current:  ## fix the code using ruff
			
 
				+	poetry run ruff src/unfccc_ghg_data/unfccc_reader --fix
			
 
				+
			
 
				 
			
 
				 .PHONY: test
			
 
				 test:  ## run the tests
			
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -4,17 +4,16 @@ Configuration file for the Sphinx documentation builder.
 
				 For the full list of built-in configuration values, see the documentation:
			
 
				 https://www.sphinx-doc.org/en/master/usage/configuration.html
			
 
				 """
			
 
				+import os
			
 
				 from functools import wraps
			
 
				+from pathlib import Path
			
 
				 
			
 
				 from sphinxcontrib_autodocgen import AutoDocGen
			
 
				 
			
 
				-import os
			
 
				-from pathlib import Path
			
 
				 os.environ["UNFCCC_GHG_ROOT_PATH"] = str(Path("..") / "..")
			
 
				 
			
 
				 import unfccc_ghg_data
			
 
				 
			
 
				-
			
 
				 # -- Project information -----------------------------------------------------
			
 
				 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
			
 
				 
			
@@ -58,7 +57,7 @@ extensions = [
 
				     # math support
			
 
				     "sphinx.ext.mathjax",
			
 
				     # execute code
			
 
				-    # "sphinx_exec_code",
			
 
				+    "sphinx_exec_code",
			
 
				 ]
			
 
				 
			
 
				 # general sphinx settings
			
@@ -144,10 +143,10 @@ nb_execution_show_tb = True
 
				 nb_execution_timeout = 120
			
 
				 nb_custom_formats = {".py": ["jupytext.reads", {"fmt": "py:percent"}]}
			
 
				 
			
 
				-# # exec-code config
			
 
				-# exec_code_working_dir = Path('..') / '..'
			
 
				-# exec_code_source_folders = [Path('..') / '..' / 'src' / 'unfccc_ghg_data']
			
 
				-# exec_code_example_dir = '.'
			
 
				+# exec-code config
			
 
				+exec_code_working_dir = "."  # Path('..') / '..'
			
 
				+exec_code_source_folders = [Path("..") / ".." / "src" / "unfccc_ghg_data"]
			
 
				+exec_code_example_dir = "."
			
 
				 
			
 
				 # -- Options for HTML output -------------------------------------------------
			
 
				 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
			
--- a/poetry.lock
+++ b/poetry.lock
@@ -1068,6 +1068,20 @@ smb = ["smbprotocol"]
 
				 ssh = ["paramiko"]
			
 
				 tqdm = ["tqdm"]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "ghostscript"
			
 
				+version = "0.7"
			
 
				+description = "Interface to the Ghostscript C-API, both high- and low-level, based on ctypes"
			
 
				+optional = false
			
 
				+python-versions = "*"
			
 
				+files = [
			
 
				+    {file = "ghostscript-0.7-py2.py3-none-any.whl", hash = "sha256:97c70e27ba6b1cab4ab1d9b4cc82d89b8b53e57971f608ded4950b8aa20c78a7"},
			
 
				+    {file = "ghostscript-0.7.tar.gz", hash = "sha256:b7875a87098740eb0be3de2d9662d15db727305ca9a6d4b7534a3cc33a4b965a"},
			
 
				+]
			
 
				+
			
 
				+[package.dependencies]
			
 
				+setuptools = ">=38.6.0"
			
 
				+
			
 
				 [[package]]
			
 
				 name = "globalwarmingpotentials"
			
 
				 version = "0.9.3"
			
@@ -4375,4 +4389,4 @@ plots = ["matplotlib"]
 
				 [metadata]
			
 
				 lock-version = "2.0"
			
 
				 python-versions = "^3.9"
			
 
				-content-hash = "db0b517e6af6c99b04624df636fc38cdf49b3ec8dd6dce24596da1cf5796c0ac"
			
 
				+content-hash = "3591f5e1b1134c148b9f68e3861beb4961659d1af5cb4dd7360ef5396a682f2e"
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ opencv-python = "^4.8.1.78"
 
				 unfccc-di-api = "^4.0.0"
			
 
				 dask = "^2023.12.0"
			
 
				 sphinx-exec-code = "^0.10"
			
 
				+ghostscript = "^0.7"
			
 
				 
			
 
				 [tool.poetry.extras]
			
 
				 plots = ["matplotlib"]
			
--- a/src/unfccc_ghg_data/__init__.py
+++ b/src/unfccc_ghg_data/__init__.py
@@ -6,15 +6,20 @@ different methods from APIs, xlsx and csv files as well as pdf files.
 
				 """
			
 
				 import importlib.metadata
			
 
				 
			
 
				-from . import (helper, unfccc_reader, unfccc_downloader, unfccc_crf_reader,
			
 
				-               unfccc_di_reader)
			
 
				+from . import (
			
 
				+    helper,
			
 
				+    unfccc_crf_reader,
			
 
				+    unfccc_di_reader,
			
 
				+    unfccc_downloader,
			
 
				+    unfccc_reader,
			
 
				+)
			
 
				 
			
 
				 __all__ = [
			
 
				     "helper",
			
 
				     "unfccc_reader",
			
 
				     "unfccc_crf_reader",
			
 
				     "unfccc_di_reader",
			
 
				-    "unfccc_downloader"
			
 
				+    "unfccc_downloader",
			
 
				 ]
			
 
				 
			
 
				 __version__ = importlib.metadata.version("unfccc_ghg_data")
			
--- a/src/unfccc_ghg_data/helper/__init__.py
+++ b/src/unfccc_ghg_data/helper/__init__.py
@@ -25,10 +25,10 @@ from .functions import (
 
				     convert_categories,
			
 
				     create_folder_mapping,
			
 
				     fix_rows,
			
 
				+    get_code_file,
			
 
				     get_country_code,
			
 
				     get_country_name,
			
 
				     process_data_for_country,
			
 
				-    get_code_file,
			
 
				 )
			
 
				 
			
 
				 __all__ = [
			
--- a/src/unfccc_ghg_data/helper/definitions.py
+++ b/src/unfccc_ghg_data/helper/definitions.py
@@ -6,14 +6,14 @@ from pathlib import Path
 
				 
			
 
				 def get_root_path() -> Path:
			
 
				     """Get the root_path from an environment variable"""
			
 
				-    root_path_env = os.getenv('UNFCCC_GHG_ROOT_PATH', None)
			
 
				+    root_path_env = os.getenv("UNFCCC_GHG_ROOT_PATH", None)
			
 
				     if root_path_env is None:
			
 
				-        raise ValueError('UNFCCC_GHG_ROOT_PATH environment '
			
 
				-                         'variable needs to be set') # noqa: TRY003
			
 
				+        raise ValueError("UNFCCC_GHG_ROOT_PATH environment " "variable needs to be set")
			
 
				     else:
			
 
				         root_path = Path(root_path_env).resolve()
			
 
				     return root_path
			
 
				 
			
 
				+
			
 
				 root_path = get_root_path()
			
 
				 code_path = root_path / "src" / "unfccc_ghg_data"
			
 
				 log_path = root_path / "log"
			
@@ -36,125 +36,250 @@ custom_country_mapping = {
 
				 }
			
 
				 
			
 
				 custom_folders = {
			
 
				-    'Venezeula_(Bolivarian_Republic_of)': 'VEN',
			
 
				-    'Venezuela_(Bolivarian_Republic_of)': 'VEN',
			
 
				-    'Micronesia_(Federated_State_of)': 'FSM',
			
 
				-    'Micronesia_(Federated_States_of)': 'FSM',
			
 
				-    'The_Republic_of_North_Macedonia': 'MKD',
			
 
				-    'Republic_of_Korea': 'KOR',
			
 
				-    'Bolivia_(Plurinational_State_of)': 'BOL',
			
 
				-    'Türkiye': 'TUR',
			
 
				-    'Iran_(Islamic_Republic_of)': 'IRN',
			
 
				-    'Côte_d`Ivoire': 'CIV',
			
 
				-    'Democratic_Republic_of_the_Congo': "COD",
			
 
				-    'European_Union': 'EUA',
			
 
				-    'Taiwan': 'TWN',
			
 
				+    "Venezeula_(Bolivarian_Republic_of)": "VEN",
			
 
				+    "Venezuela_(Bolivarian_Republic_of)": "VEN",
			
 
				+    "Micronesia_(Federated_State_of)": "FSM",
			
 
				+    "Micronesia_(Federated_States_of)": "FSM",
			
 
				+    "The_Republic_of_North_Macedonia": "MKD",
			
 
				+    "Republic_of_Korea": "KOR",
			
 
				+    "Bolivia_(Plurinational_State_of)": "BOL",
			
 
				+    "Türkiye": "TUR",
			
 
				+    "Iran_(Islamic_Republic_of)": "IRN",
			
 
				+    "Côte_d`Ivoire": "CIV",
			
 
				+    "Democratic_Republic_of_the_Congo": "COD",
			
 
				+    "European_Union": "EUA",
			
 
				+    "Taiwan": "TWN",
			
 
				 }
			
 
				 
			
 
				 GWP_factors = {
			
 
				-    'SARGWP100_to_AR4GWP100': {
			
 
				-        'HFCS': 1.1,
			
 
				-        'PFCS': 1.1,
			
 
				-        'UnspMixOfHFCs': 1.1,
			
 
				-        'UnspMixOfPFCs': 1.1,
			
 
				-        'FGASES': 1.1,
			
 
				+    "SARGWP100_to_AR4GWP100": {
			
 
				+        "HFCS": 1.1,
			
 
				+        "PFCS": 1.1,
			
 
				+        "UnspMixOfHFCs": 1.1,
			
 
				+        "UnspMixOfPFCs": 1.1,
			
 
				+        "FGASES": 1.1,
			
 
				     },
			
 
				-    'SARGWP100_to_AR5GWP100': {
			
 
				-        'HFCS': 1.2,
			
 
				-        'PFCS': 1.2,
			
 
				-        'UnspMixOfHFCs': 1.2,
			
 
				-        'UnspMixOfPFCs': 1.2,
			
 
				-        'FGASES': 1.2,
			
 
				+    "SARGWP100_to_AR5GWP100": {
			
 
				+        "HFCS": 1.2,
			
 
				+        "PFCS": 1.2,
			
 
				+        "UnspMixOfHFCs": 1.2,
			
 
				+        "UnspMixOfPFCs": 1.2,
			
 
				+        "FGASES": 1.2,
			
 
				     },
			
 
				-    'SARGWP100_to_AR6GWP100': {
			
 
				-        'HFCS': 1.4,
			
 
				-        'PFCS': 1.3,
			
 
				-        'UnspMixOfHFCs': 1.4,
			
 
				-        'UnspMixOfPFCs': 1.3,
			
 
				-        'FGASES': 1.35,
			
 
				+    "SARGWP100_to_AR6GWP100": {
			
 
				+        "HFCS": 1.4,
			
 
				+        "PFCS": 1.3,
			
 
				+        "UnspMixOfHFCs": 1.4,
			
 
				+        "UnspMixOfPFCs": 1.3,
			
 
				+        "FGASES": 1.35,
			
 
				     },
			
 
				-    'AR4GWP100_to_SARGWP100': {
			
 
				-        'HFCS': 0.91,
			
 
				-        'PFCS': 0.91,
			
 
				-        'UnspMixOfHFCs': 0.91,
			
 
				-        'UnspMixOfPFCs': 0.91,
			
 
				-        'FGASES': 0.91,
			
 
				+    "AR4GWP100_to_SARGWP100": {
			
 
				+        "HFCS": 0.91,
			
 
				+        "PFCS": 0.91,
			
 
				+        "UnspMixOfHFCs": 0.91,
			
 
				+        "UnspMixOfPFCs": 0.91,
			
 
				+        "FGASES": 0.91,
			
 
				     },
			
 
				-    'AR4GWP100_to_AR5GWP100': {
			
 
				-        'HFCS': 1.1,
			
 
				-        'PFCS': 1.1,
			
 
				-        'UnspMixOfHFCs': 1.1,
			
 
				-        'UnspMixOfPFCs': 1.1,
			
 
				-        'FGASES': 1.1,
			
 
				+    "AR4GWP100_to_AR5GWP100": {
			
 
				+        "HFCS": 1.1,
			
 
				+        "PFCS": 1.1,
			
 
				+        "UnspMixOfHFCs": 1.1,
			
 
				+        "UnspMixOfPFCs": 1.1,
			
 
				+        "FGASES": 1.1,
			
 
				     },
			
 
				-    'AR4GWP100_to_AR6GWP100': {
			
 
				-        'HFCS': 1.27,
			
 
				-        'PFCS': 1.18,
			
 
				-        'UnspMixOfHFCs': 1.27,
			
 
				-        'UnspMixOfPFCs': 1.18,
			
 
				-        'FGASES': 1.23,
			
 
				+    "AR4GWP100_to_AR6GWP100": {
			
 
				+        "HFCS": 1.27,
			
 
				+        "PFCS": 1.18,
			
 
				+        "UnspMixOfHFCs": 1.27,
			
 
				+        "UnspMixOfPFCs": 1.18,
			
 
				+        "FGASES": 1.23,
			
 
				     },
			
 
				-    'AR5GWP100_to_SARGWP100': {
			
 
				-        'HFCS': 0.83,
			
 
				-        'PFCS': 0.83,
			
 
				-        'UnspMixOfHFCs': 0.83,
			
 
				-        'UnspMixOfPFCs': 0.83,
			
 
				-        'FGASES': 0.83,
			
 
				+    "AR5GWP100_to_SARGWP100": {
			
 
				+        "HFCS": 0.83,
			
 
				+        "PFCS": 0.83,
			
 
				+        "UnspMixOfHFCs": 0.83,
			
 
				+        "UnspMixOfPFCs": 0.83,
			
 
				+        "FGASES": 0.83,
			
 
				     },
			
 
				-    'AR5GWP100_to_AR4GWP100': {
			
 
				-        'HFCS': 0.91,
			
 
				-        'PFCS': 0.91,
			
 
				-        'UnspMixOfHFCs': 0.91,
			
 
				-        'UnspMixOfPFCs': 0.91,
			
 
				-        'FGASES': 0.91,
			
 
				+    "AR5GWP100_to_AR4GWP100": {
			
 
				+        "HFCS": 0.91,
			
 
				+        "PFCS": 0.91,
			
 
				+        "UnspMixOfHFCs": 0.91,
			
 
				+        "UnspMixOfPFCs": 0.91,
			
 
				+        "FGASES": 0.91,
			
 
				     },
			
 
				-    'AR5GWP100_to_AR6GWP100': {
			
 
				-        'HFCS': 1.17,
			
 
				-        'PFCS': 1.08,
			
 
				-        'UnspMixOfHFCs': 1.17,
			
 
				-        'UnspMixOfPFCs': 1.08,
			
 
				-        'FGASES': 1.125,
			
 
				+    "AR5GWP100_to_AR6GWP100": {
			
 
				+        "HFCS": 1.17,
			
 
				+        "PFCS": 1.08,
			
 
				+        "UnspMixOfHFCs": 1.17,
			
 
				+        "UnspMixOfPFCs": 1.08,
			
 
				+        "FGASES": 1.125,
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 gas_baskets = {
			
 
				-    'HFCS (SARGWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
			
 
				-                     'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
			
 
				-                     'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
			
 
				-                     'HFC407c', 'HFC410a', 'HFC4310mee', #'OTHERHFCS (SARGWP100)',
			
 
				-                         'UnspMixOfHFCs (SARGWP100)'],
			
 
				-    'HFCS (AR4GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
			
 
				-                     'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
			
 
				-                     'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
			
 
				-                     'HFC407c', 'HFC410a', 'HFC4310mee', 'UnspMixOfHFCs (AR4GWP100)'],
			
 
				-    'HFCS (AR5GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
			
 
				-                      'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
			
 
				-                      'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
			
 
				-                      'HFC407c', 'HFC410a', 'HFC4310mee',
			
 
				-                         'UnspMixOfHFCs (AR5GWP100)'],
			
 
				-    'HFCS (AR6GWP100)': ['HFC23', 'HFC32', 'HFC41', 'HFC125', 'HFC134',
			
 
				-                      'HFC134a', 'HFC143',  'HFC143a', 'HFC152a', 'HFC227ea',
			
 
				-                      'HFC236fa', 'HFC245ca', 'HFC245fa', 'HFC365mfc',  'HFC404a',
			
 
				-                      'HFC407c', 'HFC410a', 'HFC4310mee',
			
 
				-                         'UnspMixOfHFCs (AR6GWP100)'],
			
 
				-    'PFCS (SARGWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
			
 
				-                      'UnspMixOfPFCs (SARGWP100)'],
			
 
				-    'PFCS (AR4GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
			
 
				-                      'UnspMixOfPFCs (AR4GWP100)'],
			
 
				-    'PFCS (AR5GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
			
 
				-                      'UnspMixOfPFCs (AR5GWP100)'],
			
 
				-    'PFCS (AR6GWP100)': ['C3F8', 'C4F10', 'CF4', 'C2F6', 'C6F14', 'C5F12', 'cC4F8',
			
 
				-                      'UnspMixOfPFCs (AR6GWP100)'],
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (SARGWP100)',
			
 
				-                          'PFCS (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR4GWP100)',
			
 
				-                          'PFCS (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR5GWP100)',
			
 
				-                            'PFCS (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR6GWP100)',
			
 
				-                            'PFCS (AR6GWP100)'],
			
 
				+    "HFCS (SARGWP100)": [
			
 
				+        "HFC23",
			
 
				+        "HFC32",
			
 
				+        "HFC41",
			
 
				+        "HFC125",
			
 
				+        "HFC134",
			
 
				+        "HFC134a",
			
 
				+        "HFC143",
			
 
				+        "HFC143a",
			
 
				+        "HFC152a",
			
 
				+        "HFC227ea",
			
 
				+        "HFC236fa",
			
 
				+        "HFC245ca",
			
 
				+        "HFC245fa",
			
 
				+        "HFC365mfc",
			
 
				+        "HFC404a",
			
 
				+        "HFC407c",
			
 
				+        "HFC410a",
			
 
				+        "HFC4310mee",  #'OTHERHFCS (SARGWP100)',
			
 
				+        "UnspMixOfHFCs (SARGWP100)",
			
 
				+    ],
			
 
				+    "HFCS (AR4GWP100)": [
			
 
				+        "HFC23",
			
 
				+        "HFC32",
			
 
				+        "HFC41",
			
 
				+        "HFC125",
			
 
				+        "HFC134",
			
 
				+        "HFC134a",
			
 
				+        "HFC143",
			
 
				+        "HFC143a",
			
 
				+        "HFC152a",
			
 
				+        "HFC227ea",
			
 
				+        "HFC236fa",
			
 
				+        "HFC245ca",
			
 
				+        "HFC245fa",
			
 
				+        "HFC365mfc",
			
 
				+        "HFC404a",
			
 
				+        "HFC407c",
			
 
				+        "HFC410a",
			
 
				+        "HFC4310mee",
			
 
				+        "UnspMixOfHFCs (AR4GWP100)",
			
 
				+    ],
			
 
				+    "HFCS (AR5GWP100)": [
			
 
				+        "HFC23",
			
 
				+        "HFC32",
			
 
				+        "HFC41",
			
 
				+        "HFC125",
			
 
				+        "HFC134",
			
 
				+        "HFC134a",
			
 
				+        "HFC143",
			
 
				+        "HFC143a",
			
 
				+        "HFC152a",
			
 
				+        "HFC227ea",
			
 
				+        "HFC236fa",
			
 
				+        "HFC245ca",
			
 
				+        "HFC245fa",
			
 
				+        "HFC365mfc",
			
 
				+        "HFC404a",
			
 
				+        "HFC407c",
			
 
				+        "HFC410a",
			
 
				+        "HFC4310mee",
			
 
				+        "UnspMixOfHFCs (AR5GWP100)",
			
 
				+    ],
			
 
				+    "HFCS (AR6GWP100)": [
			
 
				+        "HFC23",
			
 
				+        "HFC32",
			
 
				+        "HFC41",
			
 
				+        "HFC125",
			
 
				+        "HFC134",
			
 
				+        "HFC134a",
			
 
				+        "HFC143",
			
 
				+        "HFC143a",
			
 
				+        "HFC152a",
			
 
				+        "HFC227ea",
			
 
				+        "HFC236fa",
			
 
				+        "HFC245ca",
			
 
				+        "HFC245fa",
			
 
				+        "HFC365mfc",
			
 
				+        "HFC404a",
			
 
				+        "HFC407c",
			
 
				+        "HFC410a",
			
 
				+        "HFC4310mee",
			
 
				+        "UnspMixOfHFCs (AR6GWP100)",
			
 
				+    ],
			
 
				+    "PFCS (SARGWP100)": [
			
 
				+        "C3F8",
			
 
				+        "C4F10",
			
 
				+        "CF4",
			
 
				+        "C2F6",
			
 
				+        "C6F14",
			
 
				+        "C5F12",
			
 
				+        "cC4F8",
			
 
				+        "UnspMixOfPFCs (SARGWP100)",
			
 
				+    ],
			
 
				+    "PFCS (AR4GWP100)": [
			
 
				+        "C3F8",
			
 
				+        "C4F10",
			
 
				+        "CF4",
			
 
				+        "C2F6",
			
 
				+        "C6F14",
			
 
				+        "C5F12",
			
 
				+        "cC4F8",
			
 
				+        "UnspMixOfPFCs (AR4GWP100)",
			
 
				+    ],
			
 
				+    "PFCS (AR5GWP100)": [
			
 
				+        "C3F8",
			
 
				+        "C4F10",
			
 
				+        "CF4",
			
 
				+        "C2F6",
			
 
				+        "C6F14",
			
 
				+        "C5F12",
			
 
				+        "cC4F8",
			
 
				+        "UnspMixOfPFCs (AR5GWP100)",
			
 
				+    ],
			
 
				+    "PFCS (AR6GWP100)": [
			
 
				+        "C3F8",
			
 
				+        "C4F10",
			
 
				+        "CF4",
			
 
				+        "C2F6",
			
 
				+        "C6F14",
			
 
				+        "C5F12",
			
 
				+        "cC4F8",
			
 
				+        "UnspMixOfPFCs (AR6GWP100)",
			
 
				+    ],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": [
			
 
				+        "CO2",
			
 
				+        "CH4",
			
 
				+        "N2O",
			
 
				+        "SF6",
			
 
				+        "NF3",
			
 
				+        "HFCS (SARGWP100)",
			
 
				+        "PFCS (SARGWP100)",
			
 
				+    ],
			
 
				+    "KYOTOGHG (AR4GWP100)": [
			
 
				+        "CO2",
			
 
				+        "CH4",
			
 
				+        "N2O",
			
 
				+        "SF6",
			
 
				+        "NF3",
			
 
				+        "HFCS (AR4GWP100)",
			
 
				+        "PFCS (AR4GWP100)",
			
 
				+    ],
			
 
				+    "KYOTOGHG (AR5GWP100)": [
			
 
				+        "CO2",
			
 
				+        "CH4",
			
 
				+        "N2O",
			
 
				+        "SF6",
			
 
				+        "NF3",
			
 
				+        "HFCS (AR5GWP100)",
			
 
				+        "PFCS (AR5GWP100)",
			
 
				+    ],
			
 
				+    "KYOTOGHG (AR6GWP100)": [
			
 
				+        "CO2",
			
 
				+        "CH4",
			
 
				+        "N2O",
			
 
				+        "SF6",
			
 
				+        "NF3",
			
 
				+        "HFCS (AR6GWP100)",
			
 
				+        "PFCS (AR6GWP100)",
			
 
				+    ],
			
 
				 }
			
--- a/src/unfccc_ghg_data/helper/folder_mapping.py
+++ b/src/unfccc_ghg_data/helper/folder_mapping.py
@@ -1,4 +1,4 @@
 
				-""" create mapping of folder to countries
			
 
				+"""create mapping of folder to countries
			
 
				 
			
 
				 this script takes a folder as input (from doit) and
			
 
				 runs creates the mapping of subfolders to country codes
			
@@ -13,16 +13,17 @@ if __name__ == "__main__":
 
				     # Find the right function and possible input and output files and
			
 
				     # read the data using datalad run.
			
 
				     parser = argparse.ArgumentParser()
			
 
				-    parser.add_argument('--folder', help='folder name, relative to '
			
 
				-                                         'repository root folder')
			
 
				+    parser.add_argument(
			
 
				+        "--folder", help="folder name, relative to " "repository root folder"
			
 
				+    )
			
 
				     args = parser.parse_args()
			
 
				     folder = args.folder
			
 
				 
			
 
				-    if 'extracted_data' in folder:
			
 
				+    if "extracted_data" in folder:
			
 
				         extracted = True
			
 
				     else:
			
 
				         extracted = False
			
 
				 
			
 
				     # print available submissions
			
 
				-    print("="*10 + f" Creating folder mapping for  {folder} " + "="*10)
			
 
				+    print("=" * 10 + f" Creating folder mapping for  {folder} " + "=" * 10)
			
 
				     create_folder_mapping(folder, extracted)
			
--- a/src/unfccc_ghg_data/helper/functions.py
+++ b/src/unfccc_ghg_data/helper/functions.py
@@ -1,4 +1,4 @@
 
				-""" common functions for unfccc_ghg_data
			
 
				+"""common functions for unfccc_ghg_data
			
 
				 
			
 
				 Functions used by the different readers and downloaders in the unfccc_ghg_data package
			
 
				 """
			
@@ -74,8 +74,6 @@ def process_data_for_country(
 
				     xr.Dataset: processed dataset
			
 
				 
			
 
				     """
			
 
				-
			
 
				-
			
 
				     # 0: gather information
			
 
				     countries = list(data_country.coords[data_country.attrs["area"]].values)
			
 
				     if len(countries) > 1:
			
@@ -956,9 +954,7 @@ def get_code_file(
 
				                     )
			
 
				                 else:
			
 
				                     if print_info:
			
 
				-                        print(
			
 
				-                            f"Found code file {file.relative_to(root_path)}"
			
 
				-                        )
			
 
				+                        print(f"Found code file {file.relative_to(root_path)}")
			
 
				                 code_file_path = file
			
 
				 
			
 
				     if code_file_path is not None:
			
@@ -1011,8 +1007,10 @@ def fix_rows(
 
				         new_row = new_row.str.replace("- ", "-")
			
 
				         # replace spaces in numbers
			
 
				         pat = r"^(?P<first>[0-9\.,]*)\s(?P<last>[0-9\.,]*)$"
			
 
				+
			
 
				         def repl(m):
			
 
				             return f"{m.group('first')}{m.group('last')}"
			
 
				+
			
 
				         new_row = new_row.str.replace(pat, repl, regex=True)
			
 
				         data.loc[indices_to_merge[0]] = new_row
			
 
				         data = data.drop(indices_to_merge[1:])
			
--- a/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
@@ -27,13 +27,13 @@ from .util import NoCRFFilesError
 
				 
			
 
				 ### reading functions
			
 
				 def convert_crf_table_to_pm2if(
			
 
				-        df_table: pd.DataFrame,
			
 
				-        submission_year: int,
			
 
				-        entity_mapping: Optional[dict[str,str]]=None,
			
 
				-        coords_defaults_input: Optional[dict[str,str]]=None,
			
 
				-        filter_remove_input: Optional[dict[str,dict[str,Union[str,list]]]]=None,
			
 
				-        filter_keep_input: Optional[dict[str,dict[str,Union[str,list]]]]=None,
			
 
				-        meta_data_input: Optional[dict[str,str]]=None,
			
 
				+    df_table: pd.DataFrame,
			
 
				+    submission_year: int,
			
 
				+    entity_mapping: Optional[dict[str, str]] = None,
			
 
				+    coords_defaults_input: Optional[dict[str, str]] = None,
			
 
				+    filter_remove_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
			
 
				+    filter_keep_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
			
 
				+    meta_data_input: Optional[dict[str, str]] = None,
			
 
				 ) -> pd.DataFrame:
			
 
				     """
			
 
				     Converts a given pandas long format crf table to PRIMAP2 interchange format
			
@@ -82,7 +82,7 @@ def convert_crf_table_to_pm2if(
 
				     }
			
 
				 
			
 
				     add_coords_cols = {
			
 
				-    #    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				+        #    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				     }
			
 
				 
			
 
				     coords_terminologies = {
			
@@ -108,8 +108,8 @@ def convert_crf_table_to_pm2if(
 
				     if entity_mapping is not None:
			
 
				         coords_value_mapping["entity"] = entity_mapping
			
 
				 
			
 
				-    #coords_value_filling_template = {
			
 
				-    #}
			
 
				+    # coords_value_filling_template = {
			
 
				+    # }
			
 
				 
			
 
				     filter_remove = {
			
 
				         "f1": {
			
@@ -120,13 +120,11 @@ def convert_crf_table_to_pm2if(
 
				         for key in filter_remove_input.keys():
			
 
				             filter_remove[key] = filter_remove_input[key]
			
 
				 
			
 
				-    filter_keep = {
			
 
				-    }
			
 
				+    filter_keep = {}
			
 
				     if filter_keep_input is not None:
			
 
				         for key in filter_keep_input.keys():
			
 
				             filter_keep[key] = filter_keep_input[key]
			
 
				 
			
 
				-
			
 
				     meta_data = {
			
 
				         "references": f"https://unfccc.int/ghg-inventories-annex-i-parties/{submission_year}",
			
 
				         "rights": "",
			
@@ -146,7 +144,7 @@ def convert_crf_table_to_pm2if(
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				         filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
@@ -156,13 +154,13 @@ def convert_crf_table_to_pm2if(
 
				 
			
 
				 
			
 
				 def read_crf_table(
			
 
				-        country_codes: Union[str, list[str]],
			
 
				-        table: str,
			
 
				-        submission_year: int,
			
 
				-        data_year: Optional[Union[int, list[int]]]=None,
			
 
				-        date: Optional[str]=None,
			
 
				-        folder: Optional[str]=None,
			
 
				-        debug: Optional[bool]=False,
			
 
				+    country_codes: Union[str, list[str]],
			
 
				+    table: str,
			
 
				+    submission_year: int,
			
 
				+    data_year: Optional[Union[int, list[int]]] = None,
			
 
				+    date: Optional[str] = None,
			
 
				+    folder: Optional[str] = None,
			
 
				+    debug: Optional[bool] = False,
			
 
				 ) -> tuple[pd.DataFrame, list[list], list[list]]:
			
 
				     """
			
 
				     Read CRF table for given submission year and country / or countries
			
@@ -216,13 +214,15 @@ def read_crf_table(
 
				         country_codes = [country_codes]
			
 
				 
			
 
				     # get file names and locations
			
 
				-    input_files = get_crf_files(country_codes=country_codes,
			
 
				-                                submission_year=submission_year,
			
 
				-                                data_year=data_year,
			
 
				-                                date=date,
			
 
				-                                folder=folder)
			
 
				+    input_files = get_crf_files(
			
 
				+        country_codes=country_codes,
			
 
				+        submission_year=submission_year,
			
 
				+        data_year=data_year,
			
 
				+        date=date,
			
 
				+        folder=folder,
			
 
				+    )
			
 
				     # nasty fix for cases where exporting ran overnight and not all files have the same date
			
 
				-    if (date is not None) and (len(country_codes)==1):
			
 
				+    if (date is not None) and (len(country_codes) == 1):
			
 
				         if isinstance(data_year, list):
			
 
				             expected_files = len(data_year)
			
 
				         elif isinstance(data_year, int):
			
@@ -230,17 +230,23 @@ def read_crf_table(
 
				         else:
			
 
				             expected_files = submission_year - 1990 - 1
			
 
				         if len(input_files) < expected_files:
			
 
				-            print(f"Found only {len(input_files)} input files for {country_codes}. "
			
 
				-                  f"Expected {expected_files}.")
			
 
				-            print("Possibly exporting run overnight and some files have the previous day as date.")
			
 
				+            print(
			
 
				+                f"Found only {len(input_files)} input files for {country_codes}. "
			
 
				+                f"Expected {expected_files}."
			
 
				+            )
			
 
				+            print(
			
 
				+                "Possibly exporting run overnight and some files have the previous day as date."
			
 
				+            )
			
 
				             date_datetime = datetime.strptime(date, "%d%m%Y")
			
 
				             date_datetime = date_datetime - timedelta(days=1)
			
 
				             prv_date = date_datetime.strftime("%d%m%Y")
			
 
				-            more_input_files = get_crf_files(country_codes=country_codes,
			
 
				-                                             submission_year=submission_year,
			
 
				-                                             data_year=data_year,
			
 
				-                                             date=prv_date,
			
 
				-                                             folder=folder)
			
 
				+            more_input_files = get_crf_files(
			
 
				+                country_codes=country_codes,
			
 
				+                submission_year=submission_year,
			
 
				+                data_year=data_year,
			
 
				+                date=prv_date,
			
 
				+                folder=folder,
			
 
				+            )
			
 
				             if len(more_input_files) > 0:
			
 
				                 print(f"Found {len(more_input_files)} additional input files.")
			
 
				                 input_files = input_files + more_input_files
			
@@ -248,11 +254,13 @@ def read_crf_table(
 
				                 print("Found no additional input files")
			
 
				 
			
 
				     if input_files == []:
			
 
				-        raise NoCRFFilesError(f"No files found for {country_codes}, "
			
 
				-                              f"submission_year={submission_year}, "
			
 
				-                              f"data_year={data_year}, "
			
 
				-                              f"date={date}, "
			
 
				-                              f"folder={folder}.")
			
 
				+        raise NoCRFFilesError(
			
 
				+            f"No files found for {country_codes}, "
			
 
				+            f"submission_year={submission_year}, "
			
 
				+            f"data_year={data_year}, "
			
 
				+            f"date={date}, "
			
 
				+            f"folder={folder}."
			
 
				+        )
			
 
				 
			
 
				     # get specification
			
 
				     # if we only have a single country check if we might have a country specific
			
@@ -260,21 +268,25 @@ def read_crf_table(
 
				     if len(country_codes) == 1:
			
 
				         try:
			
 
				             crf_spec = getattr(crf, f"CRF{submission_year}_{country_codes[0]}")
			
 
				-            print(f"Using country specific specification: "
			
 
				-                  f"CRF{submission_year}_{country_codes[0]}")
			
 
				+            print(
			
 
				+                f"Using country specific specification: "
			
 
				+                f"CRF{submission_year}_{country_codes[0]}"
			
 
				+            )
			
 
				         except:
			
 
				             # no country specific specification, check for general specification
			
 
				             try:
			
 
				                 crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				             except:
			
 
				-                raise ValueError(f"No terminology exists for submission year "
			
 
				-                                 f"{submission_year}")
			
 
				+                raise ValueError(
			
 
				+                    f"No terminology exists for submission year " f"{submission_year}"
			
 
				+                )
			
 
				     else:
			
 
				         try:
			
 
				             crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				         except:
			
 
				-            raise ValueError(f"No terminology exists for submission year "
			
 
				-                             f"{submission_year}")
			
 
				+            raise ValueError(
			
 
				+                f"No terminology exists for submission year " f"{submission_year}"
			
 
				+            )
			
 
				 
			
 
				     # now loop over files and read them
			
 
				     df_all = None
			
@@ -284,8 +296,11 @@ def read_crf_table(
 
				         file_info = get_info_from_crf_filename(file.name)
			
 
				         try:
			
 
				             int(file_info["data_year"])
			
 
				-            df_this_file, unknown_rows_this_file, last_row_info_this_file = \
			
 
				-                read_crf_table_from_file(file, table, crf_spec[table], debug=debug)
			
 
				+            (
			
 
				+                df_this_file,
			
 
				+                unknown_rows_this_file,
			
 
				+                last_row_info_this_file,
			
 
				+            ) = read_crf_table_from_file(file, table, crf_spec[table], debug=debug)
			
 
				             if df_all is None:
			
 
				                 df_all = df_this_file.copy(deep=True)
			
 
				                 unknown_rows = unknown_rows_this_file
			
@@ -301,10 +316,10 @@ def read_crf_table(
 
				 
			
 
				 
			
 
				 def read_crf_table_from_file(
			
 
				-        file: Path,
			
 
				-        table: str,
			
 
				-        table_spec: dict[str, dict],
			
 
				-        debug: Optional[bool]=False,
			
 
				+    file: Path,
			
 
				+    table: str,
			
 
				+    table_spec: dict[str, dict],
			
 
				+    debug: Optional[bool] = False,
			
 
				 ) -> tuple[pd.DataFrame, list[list], list[list]]:
			
 
				     """
			
 
				     Read a single CRF table from a given file. This is the core function of the CRF
			
@@ -344,7 +359,6 @@ def read_crf_table_from_file(
 
				             dlds = dl.api.Dataset(root_path)
			
 
				             dlds.get(file.relative_to(root_path))
			
 
				 
			
 
				-
			
 
				     table_properties = table_spec["table"]
			
 
				     file_info = get_info_from_crf_filename(file.name)
			
 
				 
			
@@ -353,16 +367,23 @@ def read_crf_table_from_file(
 
				     all_cats = [cat[0] for cat in all_cats_mapping]
			
 
				 
			
 
				     unique_cats = [cat for (cat, count) in Counter(all_cats).items() if count == 1]
			
 
				-    unique_cat_tuples = [mapping for mapping in all_cats_mapping if mapping[0] in unique_cats]
			
 
				-    unique_mapping = dict(zip([tup[0] for tup in unique_cat_tuples],
			
 
				-                              [tup[1] for tup in unique_cat_tuples]))
			
 
				+    unique_cat_tuples = [
			
 
				+        mapping for mapping in all_cats_mapping if mapping[0] in unique_cats
			
 
				+    ]
			
 
				+    unique_mapping = dict(
			
 
				+        zip(
			
 
				+            [tup[0] for tup in unique_cat_tuples], [tup[1] for tup in unique_cat_tuples]
			
 
				+        )
			
 
				+    )
			
 
				     non_unique_cats = [cat for (cat, count) in Counter(all_cats).items() if count > 1]
			
 
				 
			
 
				     # prepare the sector hierarchy
			
 
				     if non_unique_cats:
			
 
				         # if we have non-unique categories present we need the information on
			
 
				         # levels within the category hierarchy
			
 
				-        category_tree = create_category_tree(all_cats_mapping, table, file_info["party"])
			
 
				+        category_tree = create_category_tree(
			
 
				+            all_cats_mapping, table, file_info["party"]
			
 
				+        )
			
 
				 
			
 
				     # prepare index colum information
			
 
				     cat_col = table_properties["col_for_categories"]
			
@@ -372,20 +393,37 @@ def read_crf_table_from_file(
 
				     # read the data
			
 
				     print(f"Reading table {table} for year {file_info['data_year']} from {file.name}.")
			
 
				     skiprows = table_properties["firstrow"] - 1
			
 
				-    nrows = table_properties["lastrow"] - skiprows + 1 # read one row more to check if we reached the end
			
 
				+    nrows = (
			
 
				+        table_properties["lastrow"] - skiprows + 1
			
 
				+    )  # read one row more to check if we reached the end
			
 
				     # we read with user specific NaN treatment as the NaN treatment is part of the conversion to
			
 
				     # PRIMAP2 format.
			
 
				-    df_raw = pd.read_excel(file, sheet_name=table, skiprows=skiprows , nrows=nrows, engine="openpyxl",
			
 
				-                               na_values=['-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN',
			
 
				-                                          'NULL', 'NaN', ''], keep_default_na=False)
			
 
				+    df_raw = pd.read_excel(
			
 
				+        file,
			
 
				+        sheet_name=table,
			
 
				+        skiprows=skiprows,
			
 
				+        nrows=nrows,
			
 
				+        engine="openpyxl",
			
 
				+        na_values=[
			
 
				+            "-1.#IND",
			
 
				+            "-1.#QNAN",
			
 
				+            "-NaN",
			
 
				+            "-nan",
			
 
				+            "1.#IND",
			
 
				+            "1.#QNAN",
			
 
				+            "NULL",
			
 
				+            "NaN",
			
 
				+            "",
			
 
				+        ],
			
 
				+        keep_default_na=False,
			
 
				+    )
			
 
				 
			
 
				     if len(df_raw) < nrows:
			
 
				-        #print(f"read data truncated because of all-nan rows")
			
 
				+        # print(f"read data truncated because of all-nan rows")
			
 
				         last_row_nan = True
			
 
				     else:
			
 
				         last_row_nan = False
			
 
				 
			
 
				-
			
 
				     cols_to_drop = []
			
 
				     # remove empty first column (for Australia tables start with an empty column)
			
 
				     # df_raw = df_raw.dropna(how="all", axis=1)
			
@@ -394,13 +432,14 @@ def read_crf_table_from_file(
 
				     # select only first table by cutting everything after a all-nan column (unless
			
 
				     # it's the first column)
			
 
				     if debug:
			
 
				-        print(f'Header before table end detection: {df_raw.columns.values}')
			
 
				+        print(f"Header before table end detection: {df_raw.columns.values}")
			
 
				     for colIdx in range(1, len(df_raw.columns.values)):
			
 
				-        if ((df_raw.iloc[:, colIdx].isna().all()) &
			
 
				-                (df_raw.columns[colIdx].startswith('Unnamed'))):
			
 
				+        if (df_raw.iloc[:, colIdx].isna().all()) & (
			
 
				+            df_raw.columns[colIdx].startswith("Unnamed")
			
 
				+        ):
			
 
				             cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx:])
			
 
				             if debug:
			
 
				-                print(f'cols_to_drop: {cols_to_drop}')
			
 
				+                print(f"cols_to_drop: {cols_to_drop}")
			
 
				             break
			
 
				 
			
 
				     if cols_to_drop is not None:
			
@@ -414,7 +453,7 @@ def read_crf_table_from_file(
 
				     # the filling leads to long and a bit confusing headers, but as long
			
 
				     # as pandas can not fill values of merged cells in all individual cells
			
 
				     # we have to use some filling algorithm.
			
 
				-    df_header = df_raw.iloc[0:len(table_properties["header"])-1].copy(deep=True)
			
 
				+    df_header = df_raw.iloc[0 : len(table_properties["header"]) - 1].copy(deep=True)
			
 
				     df_header.loc[-1] = df_header.columns.values
			
 
				     df_header.index = df_header.index + 1
			
 
				     # replace "Unnamed: X" colum names by nan to fill from left in next step
			
@@ -447,15 +486,17 @@ def read_crf_table_from_file(
 
				                         entities[col] = f"{entities[col]} {value}"
			
 
				 
			
 
				     if units is None:
			
 
				-        raise ValueError(f"Specification for table {table} does not contain unit information.")
			
 
				+        raise ValueError(
			
 
				+            f"Specification for table {table} does not contain unit information."
			
 
				+        )
			
 
				 
			
 
				     # remove double spaces
			
 
				     entities = [entity.strip() for entity in entities]
			
 
				-    entities = [re.sub('\\s+', ' ', entity) for entity in entities]
			
 
				+    entities = [re.sub("\\s+", " ", entity) for entity in entities]
			
 
				 
			
 
				     # replace the old header
			
 
				     if len(header) > 2:
			
 
				-        df_current = df_raw.drop(index=df_raw.iloc[0:len(header)-2].index)
			
 
				+        df_current = df_raw.drop(index=df_raw.iloc[0 : len(header) - 2].index)
			
 
				     else:
			
 
				         df_current = df_raw
			
 
				 
			
@@ -469,11 +510,11 @@ def read_crf_table_from_file(
 
				     # remove double spaces
			
 
				     for col in cols_for_space_stripping:
			
 
				         df_current[col] = df_current[col].str.strip()
			
 
				-        df_current[col] = df_current[col].replace('\\s+', ' ', regex=True)
			
 
				+        df_current[col] = df_current[col].replace("\\s+", " ", regex=True)
			
 
				 
			
 
				     # prepare for sector mapping by initializing result lists and
			
 
				     # variables
			
 
				-    new_cats = [[''] * len(table_properties["categories"])] * len(df_current)
			
 
				+    new_cats = [[""] * len(table_properties["categories"])] * len(df_current)
			
 
				 
			
 
				     # copy the header rows which are not part of the index (unit)
			
 
				     new_cats[0] = [df_current.iloc[0][cat_col]] * len(table_properties["categories"])
			
@@ -485,7 +526,9 @@ def read_crf_table_from_file(
 
				     if non_unique_cats:
			
 
				         # need to initialize the tree parsing.
			
 
				         last_parent = category_tree.get_node("root")
			
 
				-        all_nodes = set([category_tree.get_node(node).tag for node in category_tree.nodes])
			
 
				+        all_nodes = set(
			
 
				+            [category_tree.get_node(node).tag for node in category_tree.nodes]
			
 
				+        )
			
 
				 
			
 
				         for idx in range(1, len(df_current)):
			
 
				             current_cat = df_current.iloc[idx][cat_col]
			
@@ -497,8 +540,12 @@ def read_crf_table_from_file(
 
				                 break
			
 
				 
			
 
				             # check if current category is a child of the last node
			
 
				-            children = dict([[child.tag, child.identifier]
			
 
				-                        for child in category_tree.children(last_parent.identifier)])
			
 
				+            children = dict(
			
 
				+                [
			
 
				+                    [child.tag, child.identifier]
			
 
				+                    for child in category_tree.children(last_parent.identifier)
			
 
				+                ]
			
 
				+            )
			
 
				             if current_cat in children.keys():
			
 
				                 # the current category is a child of the current parent
			
 
				                 # do the mapping
			
@@ -517,21 +564,39 @@ def read_crf_table_from_file(
 
				                 if current_cat in all_nodes:
			
 
				                     old_parent = last_parent
			
 
				 
			
 
				-                    while (current_cat not in children.keys()) and \
			
 
				-                            (last_parent.identifier != "root"):
			
 
				+                    while (current_cat not in children.keys()) and (
			
 
				+                        last_parent.identifier != "root"
			
 
				+                    ):
			
 
				                         last_parent = category_tree.get_node(
			
 
				-                            last_parent.predecessor(category_tree.identifier))
			
 
				-                        children = dict([[child.tag, child.identifier]
			
 
				-                                    for child in category_tree.children(last_parent.identifier)])
			
 
				-
			
 
				-                    if (last_parent.identifier == "root") and \
			
 
				-                        (current_cat not in children.keys()):
			
 
				+                            last_parent.predecessor(category_tree.identifier)
			
 
				+                        )
			
 
				+                        children = dict(
			
 
				+                            [
			
 
				+                                [child.tag, child.identifier]
			
 
				+                                for child in category_tree.children(
			
 
				+                                    last_parent.identifier
			
 
				+                                )
			
 
				+                            ]
			
 
				+                        )
			
 
				+
			
 
				+                    if (last_parent.identifier == "root") and (
			
 
				+                        current_cat not in children.keys()
			
 
				+                    ):
			
 
				                         # we have not found the category as direct child of any of the
			
 
				                         # predecessors. Thus it is missing in the specification in
			
 
				                         # that place
			
 
				-                        print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, "
			
 
				-                              f"{file_info['data_year']} (last parent: {old_parent.tag}).")
			
 
				-                        unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
			
 
				+                        print(
			
 
				+                            f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, "
			
 
				+                            f"{file_info['data_year']} (last parent: {old_parent.tag})."
			
 
				+                        )
			
 
				+                        unknown_categories.append(
			
 
				+                            [
			
 
				+                                table,
			
 
				+                                file_info["party"],
			
 
				+                                current_cat,
			
 
				+                                file_info["data_year"],
			
 
				+                            ]
			
 
				+                        )
			
 
				                         # copy back the parent info to continue with next category
			
 
				                         last_parent = old_parent
			
 
				                     else:
			
@@ -543,8 +608,12 @@ def read_crf_table_from_file(
 
				                         if new_children:
			
 
				                             last_parent = node
			
 
				                 else:
			
 
				-                    print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}.")
			
 
				-                    unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
			
 
				+                    print(
			
 
				+                        f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
			
 
				+                    )
			
 
				+                    unknown_categories.append(
			
 
				+                        [table, file_info["party"], current_cat, file_info["data_year"]]
			
 
				+                    )
			
 
				     else:
			
 
				         for idx in range(1, len(df_current)):
			
 
				             current_cat = df_current.iloc[idx][cat_col]
			
@@ -557,30 +626,45 @@ def read_crf_table_from_file(
 
				             if current_cat in all_cats:
			
 
				                 new_cats[idx] = unique_mapping[current_cat]
			
 
				                 if (idx == len(df_current) - 1) and not last_row_nan:
			
 
				-                    print(f"found information in last row: category {current_cat}, row {idx}")
			
 
				-                    info_last_row.append([table, file_info["party"], current_cat, file_info['data_year']])
			
 
				+                    print(
			
 
				+                        f"found information in last row: category {current_cat}, row {idx}"
			
 
				+                    )
			
 
				+                    info_last_row.append(
			
 
				+                        [table, file_info["party"], current_cat, file_info["data_year"]]
			
 
				+                    )
			
 
				             else:
			
 
				-                print(f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}.")
			
 
				-                unknown_categories.append([table, file_info["party"], current_cat, file_info['data_year']])
			
 
				+                print(
			
 
				+                    f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
			
 
				+                )
			
 
				+                unknown_categories.append(
			
 
				+                    [table, file_info["party"], current_cat, file_info["data_year"]]
			
 
				+                )
			
 
				 
			
 
				     for idx, col in enumerate(table_properties["categories"]):
			
 
				-        df_current.insert(loc=idx, column=col, value=
			
 
				-                          [cat[idx] for cat in new_cats])
			
 
				+        df_current.insert(loc=idx, column=col, value=[cat[idx] for cat in new_cats])
			
 
				 
			
 
				     # set index
			
 
				     df_current = df_current.set_index(index_cols)
			
 
				     # process the unit information using the primap2 functions
			
 
				-    df_current = pm2.pm2io.nir_add_unit_information(df_current, **table_properties["unit_info"])
			
 
				+    df_current = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_current, **table_properties["unit_info"]
			
 
				+    )
			
 
				 
			
 
				     # convert to long format
			
 
				-    header_long = table_properties["categories"] + \
			
 
				-        ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				+    header_long = table_properties["categories"] + [
			
 
				+        "orig_cat_name",
			
 
				+        "entity",
			
 
				+        "unit",
			
 
				+        "time",
			
 
				+        "data",
			
 
				+    ]
			
 
				     df_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_current, file_info["data_year"], header_long=header_long)
			
 
				+        df_current, file_info["data_year"], header_long=header_long
			
 
				+    )
			
 
				 
			
 
				     # add country information
			
 
				     df_long.insert(0, column="country", value=file_info["party"])
			
 
				-    #df_long.insert(1, column="submission", value=f"CRF{file_info['submission_year']}")
			
 
				+    # df_long.insert(1, column="submission", value=f"CRF{file_info['submission_year']}")
			
 
				     if "coords_defaults" in table_spec.keys():
			
 
				         for col in table_spec["coords_defaults"]:
			
 
				             df_long.insert(2, column=col, value=table_spec["coords_defaults"][col])
			
@@ -589,18 +673,17 @@ def read_crf_table_from_file(
 
				 
			
 
				 
			
 
				 def get_crf_files(
			
 
				-        country_codes: Union[str, list[str]],
			
 
				-        submission_year: int,
			
 
				-        data_year: Optional[Union[int, list[int]]] = None,
			
 
				-        date: Optional[str] = None,
			
 
				-        folder: Optional[str] = None,
			
 
				+    country_codes: Union[str, list[str]],
			
 
				+    submission_year: int,
			
 
				+    data_year: Optional[Union[int, list[int]]] = None,
			
 
				+    date: Optional[str] = None,
			
 
				+    folder: Optional[str] = None,
			
 
				 ) -> list[Path]:
			
 
				     """
			
 
				     Finds all files according to given parameters
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				-
			
 
				     country_codes: str or list[str]
			
 
				         ISO 3-letter country code or list of country codes
			
 
				 
			
@@ -643,14 +726,20 @@ def get_crf_files(
 
				                 new_country_folders = folder_mapping[country_code]
			
 
				                 if isinstance(new_country_folders, str):
			
 
				                     # only one folder
			
 
				-                    country_folders = [*country_folders, data_folder / new_country_folders / submission_folder]
			
 
				+                    country_folders = [
			
 
				+                        *country_folders,
			
 
				+                        data_folder / new_country_folders / submission_folder,
			
 
				+                    ]
			
 
				                 else:
			
 
				-                    country_folders = country_folders + \
			
 
				-                                      [data_folder / folder / submission_folder
			
 
				-                                       for folder in new_country_folders]
			
 
				+                    country_folders = country_folders + [
			
 
				+                        data_folder / folder / submission_folder
			
 
				+                        for folder in new_country_folders
			
 
				+                    ]
			
 
				             else:
			
 
				-                raise ValueError(f"No data folder found for country {country_code}. "
			
 
				-                                 f"Check if folder mapping is up to date.")
			
 
				+                raise ValueError(
			
 
				+                    f"No data folder found for country {country_code}. "
			
 
				+                    f"Check if folder mapping is up to date."
			
 
				+                )
			
 
				     else:
			
 
				         country_folders = [folder]
			
 
				 
			
@@ -671,17 +760,17 @@ def get_crf_files(
 
				                     file_filter["party"] = country
			
 
				                     dates = get_submission_dates(folder, file_filter)
			
 
				                     file_filter["date"] = find_latest_date(dates)
			
 
				-                    input_files = input_files + \
			
 
				-                                  filter_filenames(input_folder.glob("*.xlsx"),
			
 
				-                                                   **file_filter)
			
 
				+                    input_files = input_files + filter_filenames(
			
 
				+                        input_folder.glob("*.xlsx"), **file_filter
			
 
				+                    )
			
 
				             else:
			
 
				                 file_filter = file_filter_template.copy()
			
 
				                 if date is not None:
			
 
				                     file_filter["date"] = date
			
 
				-                input_files = input_files + \
			
 
				-                              filter_filenames(input_folder.glob("*.xlsx"),
			
 
				-                                               **file_filter)
			
 
				-        #else:
			
 
				+                input_files = input_files + filter_filenames(
			
 
				+                    input_folder.glob("*.xlsx"), **file_filter
			
 
				+                )
			
 
				+        # else:
			
 
				         #    raise ValueError(f"Folder {input_folder} does not exist")
			
 
				     if len(input_files) == 0:
			
 
				         raise ValueError(f"No input files found in {country_folders}")
			
@@ -699,7 +788,7 @@ def get_crf_files(
 
				 
			
 
				 
			
 
				 def get_info_from_crf_filename(
			
 
				-        filename: str,
			
 
				+    filename: str,
			
 
				 ) -> dict[str, Union[int, str]]:
			
 
				     """
			
 
				     Parse given file name and return a dict with information
			
@@ -707,7 +796,6 @@ def get_info_from_crf_filename(
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				-
			
 
				     filename: str
			
 
				         The file to analyze (without path)
			
 
				 
			
@@ -729,8 +817,7 @@ def get_info_from_crf_filename(
 
				     try:
			
 
				         file_info["data_year"] = int(name_parts[2])
			
 
				     except:
			
 
				-        print(f"Data year string {name_parts[2]} "
			
 
				-              "could not be converted to int.")
			
 
				+        print(f"Data year string {name_parts[2]} " "could not be converted to int.")
			
 
				         file_info["data_year"] = name_parts[2]
			
 
				     file_info["date"] = name_parts[3]
			
 
				     # the last part (time code) is missing for Australia since 2023
			
@@ -742,11 +829,11 @@ def get_info_from_crf_filename(
 
				 
			
 
				 
			
 
				 def filter_filenames(
			
 
				-        files_to_filter: list[Path],
			
 
				-        party: Optional[Union[str, list[str]]] = None,
			
 
				-        data_year: Optional[Union[int, list[int]]] = None,
			
 
				-        submission_year: Optional[str] = None,
			
 
				-        date: Optional[str] = None,
			
 
				+    files_to_filter: list[Path],
			
 
				+    party: Optional[Union[str, list[str]]] = None,
			
 
				+    data_year: Optional[Union[int, list[int]]] = None,
			
 
				+    submission_year: Optional[str] = None,
			
 
				+    date: Optional[str] = None,
			
 
				 ) -> list[Path]:
			
 
				     """Filter a list of filenames of CRF files
			
 
				 
			
@@ -792,8 +879,8 @@ def filter_filenames(
 
				 
			
 
				 
			
 
				 def check_crf_file_info(
			
 
				-        file_info: dict,
			
 
				-        file_filter: dict,
			
 
				+    file_info: dict,
			
 
				+    file_filter: dict,
			
 
				 ) -> bool:
			
 
				     """
			
 
				     Check if a CRF file has given properties
			
@@ -837,9 +924,9 @@ def check_crf_file_info(
 
				 
			
 
				 
			
 
				 def create_category_tree(
			
 
				-        specification: list[list],
			
 
				-        table: str,
			
 
				-        country: Optional[str] = None,
			
 
				+    specification: list[list],
			
 
				+    table: str,
			
 
				+    country: Optional[str] = None,
			
 
				 ) -> Tree:
			
 
				     """
			
 
				     Create a treelib Tree for the categorical hierarchy from a CRF
			
@@ -850,7 +937,6 @@ def create_category_tree(
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				-
			
 
				     specification: List[List]
			
 
				         The `sector_mapping` dict of a table specification
			
 
				 
			
@@ -866,8 +952,10 @@ def create_category_tree(
 
				     """
			
 
				     # small sanity check on the specification
			
 
				     if len(specification[0]) < 3:
			
 
				-        raise ValueError(f"Error: Specification for Table {table} has non-unique "
			
 
				-                         "categories and need level specifications")
			
 
				+        raise ValueError(
			
 
				+            f"Error: Specification for Table {table} has non-unique "
			
 
				+            "categories and need level specifications"
			
 
				+        )
			
 
				 
			
 
				     # initialize variables for tree building
			
 
				     parent_info = [
			
@@ -888,11 +976,11 @@ def create_category_tree(
 
				     if country is not None:
			
 
				         # remove country tags from categories and mark categories
			
 
				         # for other countries for removal
			
 
				-        specification = [filter_category(mapping, country)
			
 
				-                         for mapping in specification]
			
 
				+        specification = [filter_category(mapping, country) for mapping in specification]
			
 
				         # remove the categories for other countries
			
 
				-        specification = [mapping for mapping in specification
			
 
				-                         if mapping[0] != "\\REMOVE"]
			
 
				+        specification = [
			
 
				+            mapping for mapping in specification if mapping[0] != "\\REMOVE"
			
 
				+        ]
			
 
				 
			
 
				     # build a tree from specification
			
 
				     # when looping over the categories present in the table
			
@@ -903,7 +991,9 @@ def create_category_tree(
 
				         if current_cat_level == last_cat_info["level"]:
			
 
				             # cat has the same level as preceeding on, so no change to
			
 
				             # parent node
			
 
				-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
			
 
				+            category_tree.create_node(
			
 
				+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
			
 
				+            )
			
 
				         elif current_cat_level == last_cat_info["level"] + 1:
			
 
				             # the current category is one level further away from
			
 
				             # the trunk of the tree. This means that
			
@@ -913,23 +1003,29 @@ def create_category_tree(
 
				                 {
			
 
				                     "id": last_cat_info["id"],
			
 
				                     "tag": last_cat_info["category"],
			
 
				-                    "level": last_cat_info["level"]
			
 
				+                    "level": last_cat_info["level"],
			
 
				                 }
			
 
				             )
			
 
				             # add the category as new node
			
 
				-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
			
 
				+            category_tree.create_node(
			
 
				+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
			
 
				+            )
			
 
				 
			
 
				         elif current_cat_level < last_cat_info["level"]:
			
 
				             # the new level is smaller (closer to the trunk)
			
 
				             # than the last one. Thus we remove all parents
			
 
				             # from this level on
			
 
				-            parent_info = parent_info[0: current_cat_level + 1]
			
 
				-            category_tree.create_node(current_cat, idx, parent=parent_info[-1]["id"], data=mapping)
			
 
				+            parent_info = parent_info[0 : current_cat_level + 1]
			
 
				+            category_tree.create_node(
			
 
				+                current_cat, idx, parent=parent_info[-1]["id"], data=mapping
			
 
				+            )
			
 
				         else:
			
 
				             # increase in levels of more than one is not allowed
			
 
				-            raise ValueError(f"Error in sector hierarchy for table {table}, category {current_cat}: "
			
 
				-                             f"Category level is {current_cat_level} and parent level is "
			
 
				-                             f"{parent_info[-1]['level']}")
			
 
				+            raise ValueError(
			
 
				+                f"Error in sector hierarchy for table {table}, category {current_cat}: "
			
 
				+                f"Category level is {current_cat_level} and parent level is "
			
 
				+                f"{parent_info[-1]['level']}"
			
 
				+            )
			
 
				 
			
 
				         # set last_cat_info
			
 
				         last_cat_info["category"] = current_cat
			
@@ -940,8 +1036,8 @@ def create_category_tree(
 
				 
			
 
				 
			
 
				 def filter_category(
			
 
				-        mapping: list,
			
 
				-        country: str,
			
 
				+    mapping: list,
			
 
				+    country: str,
			
 
				 ) -> list[str]:
			
 
				     """
			
 
				     This function checks if a category mapping is suitable for the given country.
			
@@ -975,9 +1071,9 @@ def filter_category(
 
				             new_mapping[0] = "\\REMOVE"
			
 
				         else:
			
 
				             re_result = re.search(regex_exclude_full, mapping[0])
			
 
				-            new_mapping[0] = mapping[0][len(re_result.group(1)) + 1:]
			
 
				+            new_mapping[0] = mapping[0][len(re_result.group(1)) + 1 :]
			
 
				     elif mapping[0].startswith(string_country):
			
 
				-        new_mapping[0] = mapping[0][len(string_country) + 1:]
			
 
				+        new_mapping[0] = mapping[0][len(string_country) + 1 :]
			
 
				     elif re.match(regex_countries, mapping[0]):
			
 
				         new_mapping[0] = "\\REMOVE"
			
 
				 
			
@@ -985,9 +1081,9 @@ def filter_category(
 
				 
			
 
				 
			
 
				 def get_latest_date_for_country(
			
 
				-        country_code: str,
			
 
				-        submission_year: int,
			
 
				-)->str:
			
 
				+    country_code: str,
			
 
				+    submission_year: int,
			
 
				+) -> str:
			
 
				     """
			
 
				     Find the latest submission date for a country
			
 
				 
			
@@ -1013,26 +1109,36 @@ def get_latest_date_for_country(
 
				         country_folders = folder_mapping[country_code]
			
 
				         if isinstance(country_folders, str):
			
 
				             # only one folder
			
 
				-            submission_date = find_latest_date(get_submission_dates(
			
 
				-                downloaded_data_path_UNFCCC / country_folders / f"CRF{submission_year}", file_filter))
			
 
				+            submission_date = find_latest_date(
			
 
				+                get_submission_dates(
			
 
				+                    downloaded_data_path_UNFCCC
			
 
				+                    / country_folders
			
 
				+                    / f"CRF{submission_year}",
			
 
				+                    file_filter,
			
 
				+                )
			
 
				+            )
			
 
				         else:
			
 
				             dates = []
			
 
				             for folder in country_folders:
			
 
				-                folder_submission = downloaded_data_path_UNFCCC / folder / f"CRF{submission_year}"
			
 
				+                folder_submission = (
			
 
				+                    downloaded_data_path_UNFCCC / folder / f"CRF{submission_year}"
			
 
				+                )
			
 
				                 if folder_submission.exists():
			
 
				                     dates = dates + get_submission_dates(folder_submission, file_filter)
			
 
				             submission_date = find_latest_date(dates)
			
 
				     else:
			
 
				-        raise ValueError(f"No data folder found for country {country_code}. "
			
 
				-                         f"Check if folder mapping is up to date.")
			
 
				+        raise ValueError(
			
 
				+            f"No data folder found for country {country_code}. "
			
 
				+            f"Check if folder mapping is up to date."
			
 
				+        )
			
 
				 
			
 
				     return submission_date
			
 
				 
			
 
				 
			
 
				 def get_submission_dates(
			
 
				-        folder: Path,
			
 
				-        file_filter: dict[str, Union[str, int, list]],
			
 
				-)->list[str]:
			
 
				+    folder: Path,
			
 
				+    file_filter: dict[str, Union[str, int, list]],
			
 
				+) -> list[str]:
			
 
				     """
			
 
				     Returns all submission dates available in a folder
			
 
				 
			
@@ -1050,8 +1156,10 @@ def get_submission_dates(
 
				             List of dates as str
			
 
				     """
			
 
				     if "date" in file_filter:
			
 
				-        raise ValueError("'date' present in 'file_filter'. This makes no sense as "
			
 
				-                         "the function's purpose is to return available dates.")
			
 
				+        raise ValueError(
			
 
				+            "'date' present in 'file_filter'. This makes no sense as "
			
 
				+            "the function's purpose is to return available dates."
			
 
				+        )
			
 
				 
			
 
				     if folder.exists():
			
 
				         files = filter_filenames(folder.glob("*.xlsx"), **file_filter)
			
@@ -1065,9 +1173,9 @@ def get_submission_dates(
 
				 
			
 
				 
			
 
				 def get_submission_parties(
			
 
				-        folder: Path,
			
 
				-        file_filter: dict[str, Union[str, int, list]],
			
 
				-)->list[str]:
			
 
				+    folder: Path,
			
 
				+    file_filter: dict[str, Union[str, int, list]],
			
 
				+) -> list[str]:
			
 
				     """
			
 
				     Returns all submission dates available in a folder
			
 
				 
			
@@ -1085,8 +1193,10 @@ def get_submission_parties(
 
				             List of parties as str
			
 
				     """
			
 
				     if "party" in file_filter:
			
 
				-        raise ValueError("'party' present in 'file_filter'. This makes no sense as "
			
 
				-                         "the function's purpose is to return available parties.")
			
 
				+        raise ValueError(
			
 
				+            "'party' present in 'file_filter'. This makes no sense as "
			
 
				+            "the function's purpose is to return available parties."
			
 
				+        )
			
 
				 
			
 
				     if folder.exists():
			
 
				         files = filter_filenames(list(folder.glob("*.xlsx")), **file_filter)
			
@@ -1100,9 +1210,9 @@ def get_submission_parties(
 
				 
			
 
				 
			
 
				 def find_latest_date(
			
 
				-        dates: list[str],
			
 
				-        date_format: str='%d%m%Y',
			
 
				-)-> str:
			
 
				+    dates: list[str],
			
 
				+    date_format: str = "%d%m%Y",
			
 
				+) -> str:
			
 
				     """
			
 
				     Returns the latest date in a list of dates as str in the format
			
 
				     ddmmyyyy
			
@@ -1117,11 +1227,11 @@ def find_latest_date(
 
				         str: latest date
			
 
				     """
			
 
				     if len(dates) > 0:
			
 
				-        dates_datetime = [[date, datetime.strptime(date, date_format)] for date in
			
 
				-                          dates]
			
 
				+        dates_datetime = [
			
 
				+            [date, datetime.strptime(date, date_format)] for date in dates
			
 
				+        ]
			
 
				         dates_datetime = sorted(dates_datetime, key=itemgetter(1))
			
 
				     else:
			
 
				         raise ValueError("Passed list of dates is empty")
			
 
				 
			
 
				     return dates_datetime[-1][0]
			
 
				-
			
--- a/src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country.py
+++ b/src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country.py
@@ -6,12 +6,13 @@ function such that it can be called from datalad
 
				 import argparse
			
 
				 
			
 
				 from unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_core import (
			
 
				-    read_UNFCCC_DI_for_country)
			
 
				+    read_UNFCCC_DI_for_country,
			
 
				+)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     parser = argparse.ArgumentParser()
			
 
				-    parser.add_argument('--country', help='Country code')
			
 
				-    parser.add_argument('--date', help='String with current date')
			
 
				+    parser.add_argument("--country", help="Country code")
			
 
				+    parser.add_argument("--date", help="String with current date")
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     country_code = args.country
			
@@ -19,10 +20,10 @@ if __name__ == "__main__":
 
				 
			
 
				     read_UNFCCC_DI_for_country(
			
 
				         country_code=country_code,
			
 
				-        category_groups=None, # read all categories
			
 
				-        read_subsectors=False, # not applicable as we read all categories
			
 
				+        category_groups=None,  # read all categories
			
 
				+        read_subsectors=False,  # not applicable as we read all categories
			
 
				         date_str=date_str,
			
 
				-        pm2if_specifications=None, # automatically use the right specs for AI and NAI
			
 
				-        default_gwp=None, # automatically uses right default GWP for AI and NAI
			
 
				+        pm2if_specifications=None,  # automatically use the right specs for AI and NAI
			
 
				+        default_gwp=None,  # automatically uses right default GWP for AI and NAI
			
 
				         debug=False,
			
 
				     )
			
--- a/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py
@@ -5,7 +5,7 @@ Based on `process_bur` from national-inventory-submissions
 
				 (https://github.com/openclimatedata/national-inventory-submisions)
			
 
				 """
			
 
				 
			
 
				-#import requests
			
 
				+# import requests
			
 
				 import re
			
 
				 import time
			
 
				 from pathlib import Path
			
@@ -15,25 +15,24 @@ import pandas as pd
 
				 from bs4 import BeautifulSoup
			
 
				 from selenium.webdriver import Firefox
			
 
				 from selenium.webdriver.firefox.options import Options
			
 
				-from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC
			
 
				-
			
 
				+from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     print("Fetching BUR submissions ...")
			
 
				 
			
 
				     url = "https://unfccc.int/BURs"
			
 
				 
			
 
				-    #print(url)
			
 
				+    # print(url)
			
 
				 
			
 
				     # set options for headless mode
			
 
				     profile_path = ".firefox"
			
 
				     options = Options()
			
 
				-    options.add_argument('-headless')
			
 
				+    options.add_argument("-headless")
			
 
				 
			
 
				     # create profile for headless mode and automatic downloading
			
 
				-    options.set_preference('profile', profile_path)
			
 
				+    options.set_preference("profile", profile_path)
			
 
				 
			
 
				     # set up selenium driver
			
 
				     driver = Firefox(options=options)
			
@@ -64,7 +63,6 @@ if __name__ == "__main__":
 
				             if str(Path(href).parent).endswith("documents"):
			
 
				                 targets.append({"title": title, "url": href})
			
 
				 
			
 
				-
			
 
				     pattern = re.compile(r"BUR ?\d")
			
 
				 
			
 
				     # Go through sub-pages.
			
@@ -79,7 +77,6 @@ if __name__ == "__main__":
 
				         else:
			
 
				             no_downloads.append({target["title"], url})
			
 
				 
			
 
				-
			
 
				     if len(no_downloads) > 0:
			
 
				         print("No downloads for ", no_downloads)
			
 
				 
			
--- a/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_nc.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_nc.py
@@ -19,21 +19,20 @@ from selenium.webdriver.firefox.options import Options
 
				 from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC
			
 
				 from unfccc_ghg_data.unfccc_downloader import get_unfccc_submission_info
			
 
				 
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     print("Fetching NC submissions ...")
			
 
				 
			
 
				     url = "https://unfccc.int/non-annex-I-NCs"
			
 
				 
			
 
				-    #print(url)
			
 
				+    # print(url)
			
 
				 
			
 
				     # set options for headless mode
			
 
				     profile_path = ".firefox"
			
 
				     options = Options()
			
 
				-    options.add_argument('-headless')
			
 
				+    options.add_argument("-headless")
			
 
				 
			
 
				     # create profile for headless mode and automatic downloading
			
 
				-    options.set_preference('profile', profile_path)
			
 
				+    options.set_preference("profile", profile_path)
			
 
				 
			
 
				     # set up selenium driver
			
 
				     driver = Firefox(options=options)
			
@@ -64,10 +63,8 @@ if __name__ == "__main__":
 
				             if str(Path(href).parent).endswith("documents"):
			
 
				                 targets.append({"title": title, "url": href})
			
 
				 
			
 
				-
			
 
				     pattern = re.compile(r"NC ?\d")
			
 
				 
			
 
				-
			
 
				     # Go through sub-pages.
			
 
				     for target in targets:
			
 
				         time.sleep(randrange(5, 15))
			
@@ -80,7 +77,6 @@ if __name__ == "__main__":
 
				         else:
			
 
				             no_downloads.append({target["title"], url})
			
 
				 
			
 
				-
			
 
				     if len(no_downloads) > 0:
			
 
				         print("No downloads for ", no_downloads)
			
 
				 
			
--- a/src/unfccc_ghg_data/unfccc_reader/Argentina/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Argentina/__init__.py
@@ -1,7 +1,30 @@
 
				-"""Argentina (BUR4)
			
 
				+"""Read Argentina's BURs, NIRs, NCs
			
 
				 
			
 
				 Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				-Currently code for the following submissions is available:
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				 
			
 
				-* BUR4 (from pdf)
			
 
				-"""
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'ARG'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=ARG
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR4_from_pdf.py
@@ -2,16 +2,22 @@
 
				 Read Argentina's BUR4 from pdf
			
 
				 
			
 
				 This script reads data from Argentina's fourth Binnial Update Report (BUR4).
			
 
				- Data is read from the pdf file using camelot"""
			
 
				+Data is read from the pdf file using camelot
			
 
				+"""
			
 
				 
			
 
				+import os
			
 
				 import sys
			
 
				 
			
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				 from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from unfccc_ghg_data.helper import gas_baskets, process_data_for_country
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				 
			
 
				 # ###
			
 
				 # configuration
			
@@ -21,53 +27,49 @@ from unfccc_ghg_data.helper import gas_baskets, process_data_for_country
 
				 #  PRIMAP2 version
			
 
				 if __name__ == "__main__":
			
 
				     # folders and files
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Argentina' / \
			
 
				-                   'BUR4'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Argentina'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Argentina" / "BUR4"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Argentina"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'ARG_BUR4_2022_'
			
 
				+    output_filename = "ARG_BUR4_2022_"
			
 
				+
			
 
				+    pdf_file = "4to_Informe_Bienal_de_la_Rep%C3%BAblica_Argentina.pdf"
			
 
				 
			
 
				-    pdf_file = '4to_Informe_Bienal_de_la_Rep%C3%BAblica_Argentina.pdf'
			
 
				+    # definitions part 1: reading data from pdf and preprocessing for conversion to
			
 
				+    # PRIMAP2 format
			
 
				 
			
 
				-    # definitions part 1: reading data from pdf and preprocessing for conversion to PRIMAP2 format
			
 
				     # part 1.1 KyotoGHG, CO2, CH4, N2O tables
			
 
				     #
			
 
				     pages_to_read = range(232, 244)
			
 
				     data_start_keyword = "Id#"
			
 
				     data_end_keyword = "Fuente: Elaboración propia"
			
 
				-    index_cols = ['Id#', 'Nombre']
			
 
				-    col_rename = {
			
 
				-        index_cols[0]: "category",
			
 
				-        index_cols[1]: "orig_cat_name"
			
 
				-    }
			
 
				-    metadata = {
			
 
				-        "entity": [0, 1],
			
 
				-        "unit": [0, 2]
			
 
				-    }
			
 
				+    index_cols = ["Id#", "Nombre"]
			
 
				+    col_rename = {index_cols[0]: "category", index_cols[1]: "orig_cat_name"}
			
 
				+    metadata = {"entity": [0, 1], "unit": [0, 2]}
			
 
				 
			
 
				     rows_to_drop = [0]
			
 
				 
			
 
				     metadata_mapping = {
			
 
				-        'unit': {
			
 
				-            '(GgCO2e)': 'GgCO2e',
			
 
				-            '(GgCO2)': 'Gg',
			
 
				-            '(GgN2O)': 'Gg',
			
 
				-            '(GgCH4)': 'Gg',
			
 
				-            '(GgGas)': 'Gg',
			
 
				+        "unit": {
			
 
				+            "(GgCO2e)": "GgCO2e",
			
 
				+            "(GgCO2)": "Gg",
			
 
				+            "(GgN2O)": "Gg",
			
 
				+            "(GgCH4)": "Gg",
			
 
				+            "(GgGas)": "Gg",
			
 
				         }
			
 
				     }
			
 
				 
			
 
				     # part 1.2: fgases table
			
 
				-    # the f-gases table is in wide format with no sectoral resolution and gases as row header
			
 
				+    # the f-gases table is in wide format with no sectoral resolution and gases as row
			
 
				+    # header
			
 
				     pages_to_read_fgases = range(244, 247)
			
 
				     data_start_keyword_fgases = "Gas"
			
 
				-    index_cols_fgases = ['Gas']
			
 
				+    index_cols_fgases = ["Gas"]
			
 
				     cols_to_drop_fgases = ["Nombre"]
			
 
				     metadata_fgases = {
			
 
				         "unit": [0, 2],
			
 
				-        "category": '2',
			
 
				+        "category": "2",
			
 
				         "orig_cat_name": "PROCESOS INDUSTRIALES Y USO DE PRODUCTOS",
			
 
				     }
			
 
				     col_rename_fgases = {
			
@@ -79,14 +81,14 @@ if __name__ == "__main__":
 
				     cats_remove = ["Information Items", "Memo Items (3)"]
			
 
				     # manual category codes
			
 
				     cat_codes_manual = {  # conversion to PRIMAP1 format
			
 
				-        '1A6': 'MBIO',
			
 
				-        '1A3di': 'MBKM',
			
 
				-        '1A3ai': 'MBKA',
			
 
				-        '1A3di Navegación marítima y fluvial internacional': 'MBKM',
			
 
				-        'S/N': 'MMULTIOP',
			
 
				+        "1A6": "MBIO",
			
 
				+        "1A3di": "MBKM",
			
 
				+        "1A3ai": "MBKA",
			
 
				+        "1A3di Navegación marítima y fluvial internacional": "MBKM",
			
 
				+        "S/N": "MMULTIOP",
			
 
				     }
			
 
				 
			
 
				-    cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
			
 
				+    cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,8}).*"
			
 
				 
			
 
				     time_format = "%Y"
			
 
				 
			
@@ -116,32 +118,32 @@ if __name__ == "__main__":
 
				     coords_value_mapping = {
			
 
				         #    "category": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'HFC-23': 'HFC23',
			
 
				-            'HFC-32': 'HFC32',
			
 
				-            'HFC-41': 'HFC41',
			
 
				-            'HFC-43-10mee': 'HFC4310mee',
			
 
				-            'HFC-125': 'HFC125',
			
 
				-            'HFC-134': 'HFC134',
			
 
				-            'HFC-134a': 'HFC134a',
			
 
				-            'HFC-152a': 'HFC152a',
			
 
				-            'HFC-143': 'HFC143',
			
 
				-            'HFC-143a': 'HFC143a',
			
 
				-            'HFC-227ea': 'HFC227ea',
			
 
				-            'HFC-236fa': 'HFC236fa',
			
 
				-            'HFC-245ca': 'HFC245ca',
			
 
				-            'HFC-365mfc': 'HFC365mfc',
			
 
				-            'HFC-245fa': 'HFC245fa',
			
 
				-            'PFC-143 (CF4)': 'CF4',
			
 
				-            'PFC-116 (C2F6)': 'C2F6',
			
 
				-            'PFC-218 (C3F8)': 'C3F8',
			
 
				-            'PFC-31-10 (C4F10)': 'C4F10',
			
 
				-            'c-C4F8': 'cC4F8',
			
 
				-            'PFC-51-144 (C6F14)': 'C6F14',
			
 
				+            "HFC-23": "HFC23",
			
 
				+            "HFC-32": "HFC32",
			
 
				+            "HFC-41": "HFC41",
			
 
				+            "HFC-43-10mee": "HFC4310mee",
			
 
				+            "HFC-125": "HFC125",
			
 
				+            "HFC-134": "HFC134",
			
 
				+            "HFC-134a": "HFC134a",
			
 
				+            "HFC-152a": "HFC152a",
			
 
				+            "HFC-143": "HFC143",
			
 
				+            "HFC-143a": "HFC143a",
			
 
				+            "HFC-227ea": "HFC227ea",
			
 
				+            "HFC-236fa": "HFC236fa",
			
 
				+            "HFC-245ca": "HFC245ca",
			
 
				+            "HFC-365mfc": "HFC365mfc",
			
 
				+            "HFC-245fa": "HFC245fa",
			
 
				+            "PFC-143 (CF4)": "CF4",
			
 
				+            "PFC-116 (C2F6)": "C2F6",
			
 
				+            "PFC-218 (C3F8)": "C3F8",
			
 
				+            "PFC-31-10 (C4F10)": "C4F10",
			
 
				+            "c-C4F8": "cC4F8",
			
 
				+            "PFC-51-144 (C6F14)": "C6F14",
			
 
				         },
			
 
				         "unit": "PRIMAP1",
			
 
				         "orig_cat_name": {
			
 
				             "1A3di Navegación marítima y fluvial internacional": "Navegación marítima y fluvial internacional",
			
 
				-        }
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				     coords_value_filling = {
			
@@ -172,7 +174,8 @@ if __name__ == "__main__":
 
				         "references": "https://unfccc.int/documents/419772",
			
 
				         "rights": "XXXX",
			
 
				         "contact": "mail@johannes-guetschow.de",
			
 
				-        "title": "Cuarto Informe Bienal de Actualización de la República Argentina a la Convención Marco delas Naciones Unidas Sobre el Cambio Climático",
			
 
				+        "title": "Cuarto Informe Bienal de Actualización de la República Argentina a "
			
 
				+        "la Convención Marco delas Naciones Unidas Sobre el Cambio Climático",
			
 
				         "comment": "Read fom pdf file by Johannes Gütschow",
			
 
				         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				     }
			
@@ -192,8 +195,9 @@ if __name__ == "__main__":
 
				     data_all = None
			
 
				     for page in pages_to_read:
			
 
				         # read current page
			
 
				-        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
			
 
				-                                  flavor='stream')
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file), pages=str(page), flavor="stream"
			
 
				+        )
			
 
				         df_current = tables[0].df
			
 
				         rows_to_drop = []
			
 
				         for index, data in df_current.iterrows():
			
@@ -212,16 +216,18 @@ if __name__ == "__main__":
 
				         df_current = df_current.drop(rows_to_drop)
			
 
				         idx_header = df_current.index[df_current[0] == index_cols[0]].tolist()
			
 
				         df_current = df_current.rename(
			
 
				-            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
			
 
				+            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1
			
 
				+        )
			
 
				         df_current = df_current.drop(idx_header)
			
 
				 
			
 
				         # for sheet "Aggregate GHGs" fill entity cell
			
 
				         if page in range(232, 235):
			
 
				             df_current.iloc[
			
 
				-                metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
			
 
				+                metadata["entity"][0], metadata["entity"][1]
			
 
				+            ] = "KYOTOGHG (SARGWP100)"
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set index. necessary for the stack operation in the conversion to long format
			
 
				         # df_current = df_current.set_index(index_cols)
			
 
				         # add columns
			
@@ -242,21 +248,27 @@ if __name__ == "__main__":
 
				         df_current = df_current.drop(df_current.index[0])
			
 
				 
			
 
				         # fix number format
			
 
				-        df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
			
 
				-        df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
			
 
				-                                      axis=1)
			
 
				+        df_current = df_current.apply(
			
 
				+            lambda x: x.str.replace(".", "", regex=False), axis=1
			
 
				+        )
			
 
				+        df_current = df_current.apply(
			
 
				+            lambda x: x.str.replace(",", ".", regex=False), axis=1
			
 
				+        )
			
 
				 
			
 
				-        df_current.rename(columns=col_rename, inplace=True)
			
 
				+        df_current = df_current.rename(columns=col_rename)
			
 
				 
			
 
				         # reindex
			
 
				-        df_current.reset_index(inplace=True, drop=True)
			
 
				+        df_current = df_current.reset_index(drop=True)
			
 
				 
			
 
				         df_current["category"] = df_current["category"].replace(cat_codes_manual)
			
 
				+
			
 
				         # then the regex replacements
			
 
				-        def repl(m):
			
 
				-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				-        df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
			
 
				-                                                                    regex=True)
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				+        df_current["category"] = df_current["category"].str.replace(
			
 
				+            cat_code_regexp, repl, regex=True
			
 
				+        )
			
 
				 
			
 
				         df_current = df_current.reset_index(drop=True)
			
 
				 
			
@@ -274,7 +286,7 @@ if __name__ == "__main__":
 
				             coords_value_filling=coords_value_filling,
			
 
				             filter_remove=filter_remove,
			
 
				             filter_keep=filter_keep,
			
 
				-            meta_data=meta_data
			
 
				+            meta_data=meta_data,
			
 
				         )
			
 
				 
			
 
				         # convert to PRIMAP2 native format
			
@@ -289,8 +301,9 @@ if __name__ == "__main__":
 
				     # read fgases
			
 
				     for page in pages_to_read_fgases:
			
 
				         # read current page
			
 
				-        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
			
 
				-                                  flavor='stream')
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file), pages=str(page), flavor="stream"
			
 
				+        )
			
 
				         df_current = tables[0].df
			
 
				         rows_to_drop = []
			
 
				         for index, data in df_current.iterrows():
			
@@ -309,11 +322,12 @@ if __name__ == "__main__":
 
				         df_current = df_current.drop(rows_to_drop)
			
 
				         idx_header = df_current.index[df_current[0] == index_cols_fgases[0]].tolist()
			
 
				         df_current = df_current.rename(
			
 
				-            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
			
 
				+            dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1
			
 
				+        )
			
 
				         df_current = df_current.drop(idx_header)
			
 
				 
			
 
				         # drop all rows where the index cols (category code
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols_fgases)
			
 
				         # set index. necessary for the stack operation in the conversion to long format
			
 
				         # df_current = df_current.set_index(index_cols)
			
 
				         # add columns
			
@@ -324,7 +338,8 @@ if __name__ == "__main__":
 
				                 value = metadata_fgases[col]
			
 
				             else:
			
 
				                 value = df_current.iloc[
			
 
				-                    metadata_fgases[col][0], metadata_fgases[col][1] + inserted]
			
 
				+                    metadata_fgases[col][0], metadata_fgases[col][1] + inserted
			
 
				+                ]
			
 
				                 if col in metadata_mapping.keys():
			
 
				                     if value in metadata_mapping[col].keys():
			
 
				                         value = metadata_mapping[col][value]
			
@@ -339,21 +354,27 @@ if __name__ == "__main__":
 
				         df_current = df_current.drop(df_current.index[0])
			
 
				 
			
 
				         # fix number format
			
 
				-        df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
			
 
				-        df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
			
 
				-                                      axis=1)
			
 
				+        df_current = df_current.apply(
			
 
				+            lambda x: x.str.replace(".", "", regex=False), axis=1
			
 
				+        )
			
 
				+        df_current = df_current.apply(
			
 
				+            lambda x: x.str.replace(",", ".", regex=False), axis=1
			
 
				+        )
			
 
				 
			
 
				-        df_current.rename(columns=col_rename_fgases, inplace=True)
			
 
				+        df_current = df_current.rename(columns=col_rename_fgases)
			
 
				 
			
 
				         # reindex
			
 
				-        df_current.reset_index(inplace=True, drop=True)
			
 
				+        df_current = df_current.reset_index(drop=True)
			
 
				 
			
 
				         df_current["category"] = df_current["category"].replace(cat_codes_manual)
			
 
				-        # then the regex repalcements
			
 
				-        def repl(m):
			
 
				-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				-        df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
			
 
				-                                                                    regex=True)
			
 
				+
			
 
				+        # then the regex replacements
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				+        df_current["category"] = df_current["category"].str.replace(
			
 
				+            cat_code_regexp, repl, regex=True
			
 
				+        )
			
 
				 
			
 
				         df_current = df_current.reset_index(drop=True)
			
 
				 
			
@@ -371,7 +392,7 @@ if __name__ == "__main__":
 
				             coords_value_filling=coords_value_filling,
			
 
				             filter_remove=filter_remove,
			
 
				             filter_keep=filter_keep,
			
 
				-            meta_data=meta_data
			
 
				+            meta_data=meta_data,
			
 
				         )
			
 
				 
			
 
				         # convert to PRIMAP2 native format
			
@@ -390,19 +411,17 @@ if __name__ == "__main__":
 
				         processing_info_country=None,
			
 
				     )
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # save data to IF and native format
			
 
				     # ###
			
 
				 
			
 
				     encoding = {var: compression for var in data_all.data_vars}
			
 
				-    data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies[
			
 
				-        "category"] + ".nc"), encoding=encoding)
			
 
				+    data_all.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     data_if = data_all.pr.to_interchange_format()
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
@@ -1,7 +1,30 @@
 
				-"""Chile (BUR4, BUR5)
			
 
				+"""Read Chile's BURs, NIRs, NCs
			
 
				 
			
 
				-Scripts and configurations to read Chile's is available:
			
 
				- * BUR4 (from xlsx)
			
 
				- * BUR5 (from xlsx)
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				 
			
 
				-"""
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'CHL'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=CHL
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py
@@ -1,166 +1,295 @@
 
				+"""Config for Chile BUR4,5
			
 
				+
			
 
				+General configuration for reading the inventory files underlying Chile's BURs 4 and 5.
			
 
				+PRIMAP2 data for mat specific configuration is BUR specific and not contained here
			
 
				+but in the reading scripts.
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 ## parameters for conversion to IPCC2006 categories
			
 
				 filter_remove_IPCC2006 = {
			
 
				-    "filter_cats": { # filter cats that have no 1:1 match for IPCC2006 or are additional subsectors
			
 
				+    "filter_cats": {  # filter cats that have no 1:1 match for IPCC2006 or are additional subsectors
			
 
				         "category (IPCC2006_PRIMAP)": [
			
 
				             # refrigeration and air conditioning subsectors don't match IPCC2006
			
 
				-            '2.F.1.a', '2.F.1.b', '2.F.1.c', '2.F.1.d', '2.F.1.e', '2.F.1.f',
			
 
				+            "2.F.1.a",
			
 
				+            "2.F.1.b",
			
 
				+            "2.F.1.c",
			
 
				+            "2.F.1.d",
			
 
				+            "2.F.1.e",
			
 
				+            "2.F.1.f",
			
 
				             # additional subsectors for other cattle in enteric fermentation
			
 
				-            '3.A.1.b.i', '3.A.1.b.ii', '3.A.1.b.iii', '3.A.1.b.iv', '3.A.1.b.v',
			
 
				+            "3.A.1.b.i",
			
 
				+            "3.A.1.b.ii",
			
 
				+            "3.A.1.b.iii",
			
 
				+            "3.A.1.b.iv",
			
 
				+            "3.A.1.b.v",
			
 
				             # additional subcategories for swine in enteric fermentation
			
 
				-            '3.A.3.a', '3.A.3.b', '3.A.3.c',
			
 
				+            "3.A.3.a",
			
 
				+            "3.A.3.b",
			
 
				+            "3.A.3.c",
			
 
				             # other animals in enteric fermentation not fitting the IPCC2006 other animals
			
 
				-            '3.A.4',
			
 
				+            "3.A.4",
			
 
				             # need to be summed to '3.A.4.j'
			
 
				-            '3.A.4.f', '3.A.4.g', '3.A.4.g.i', '3.A.4.g.ii',
			
 
				+            "3.A.4.f",
			
 
				+            "3.A.4.g",
			
 
				+            "3.A.4.g.i",
			
 
				+            "3.A.4.g.ii",
			
 
				             # additional subsectors for other cattle in enteric fermentation
			
 
				-            '3.B.1.b.i', '3.B.1.b.ii', '3.B.1.b.iii', '3.B.1.b.iv', '3.B.1.b.v',
			
 
				+            "3.B.1.b.i",
			
 
				+            "3.B.1.b.ii",
			
 
				+            "3.B.1.b.iii",
			
 
				+            "3.B.1.b.iv",
			
 
				+            "3.B.1.b.v",
			
 
				             # additional subcategories for swine in enteric fermentation
			
 
				-            '3.B.3.a', '3.B.3.b', '3.B.3.c',
			
 
				+            "3.B.3.a",
			
 
				+            "3.B.3.b",
			
 
				+            "3.B.3.c",
			
 
				             # other animals in enteric fermentation not fitting the IPCC2006 other animals
			
 
				-            '3.B.4',
			
 
				+            "3.B.4",
			
 
				             # need to be summed to '3.A.4.j'
			
 
				-            '3.B.4.f', '3.B.4.g', '3.B.4.g.i', '3.B.4.g.ii',
			
 
				+            "3.B.4.f",
			
 
				+            "3.B.4.g",
			
 
				+            "3.B.4.g.i",
			
 
				+            "3.B.4.g.ii",
			
 
				             # subsectors of indirect N2O from manure management
			
 
				-            '3.B.5.a', '3.B.5.b', '3.B.5.c', '3.B.5.d', '3.B.5.d.i', '3.B.5.d.ii',
			
 
				-            '3.B.5.d.iii', '3.B.5.d.iv', '3.B.5.d.v', '3.B.5.d.vi', '3.B.5.d.vii',
			
 
				+            "3.B.5.a",
			
 
				+            "3.B.5.b",
			
 
				+            "3.B.5.c",
			
 
				+            "3.B.5.d",
			
 
				+            "3.B.5.d.i",
			
 
				+            "3.B.5.d.ii",
			
 
				+            "3.B.5.d.iii",
			
 
				+            "3.B.5.d.iv",
			
 
				+            "3.B.5.d.v",
			
 
				+            "3.B.5.d.vi",
			
 
				+            "3.B.5.d.vii",
			
 
				             # subsectors of rice cultivation
			
 
				-            '3.C.1', '3.C.2', '3.C.3', '3.C.4',
			
 
				+            "3.C.1",
			
 
				+            "3.C.2",
			
 
				+            "3.C.3",
			
 
				+            "3.C.4",
			
 
				             # no direct represenation of "agricultural soils" in IPCC 2006
			
 
				-            '3.D',
			
 
				+            "3.D",
			
 
				             # subsectors of 3.D.1. not matching subsectors of 3.C.4 (direct emissions from managed soils)
			
 
				             # '3.D.1.a.': '3.C.1.a', '3.D.1.b.': '3.C.1.b', '3.D.1.c.': '3.A.4.c', '3.D.1.d.': '3.C.4.d',
			
 
				-            '3.D.1.a', '3.D.1.b', '3.D.1.b.i', '3.D.1.b.ii', '3.D.1.b.iii', '3.D.1.c',
			
 
				-            '3.D.1.d', '3.D.1.e', '3.D.1.f', '3.D.1.g',
			
 
				+            "3.D.1.a",
			
 
				+            "3.D.1.b",
			
 
				+            "3.D.1.b.i",
			
 
				+            "3.D.1.b.ii",
			
 
				+            "3.D.1.b.iii",
			
 
				+            "3.D.1.c",
			
 
				+            "3.D.1.d",
			
 
				+            "3.D.1.e",
			
 
				+            "3.D.1.f",
			
 
				+            "3.D.1.g",
			
 
				             # additional subsector level of 3.D.2.a (3.C.5.a Atmospheric deposition)
			
 
				-            '3.D.2.a.i', '3.D.2.a.ii', '3.D.2.a.ii.1', '3.D.2.a.ii.2', '3.D.2.a.ii.3', '3.D.2.a.iii',
			
 
				+            "3.D.2.a.i",
			
 
				+            "3.D.2.a.ii",
			
 
				+            "3.D.2.a.ii.1",
			
 
				+            "3.D.2.a.ii.2",
			
 
				+            "3.D.2.a.ii.3",
			
 
				+            "3.D.2.a.iii",
			
 
				             # additional subsector level of 3.D.2.b (3.C.5.b Nitrongen leaching and runoff)
			
 
				-            '3.D.2.b.i', '3.D.2.b.ii', '3.D.2.b.ii.1', '3.D.2.b.ii.2', '3.D.2.b.ii.3', '3.D.2.b.iii',
			
 
				-            '3.D.2.b.iv', '3.D.2.b.v',
			
 
				+            "3.D.2.b.i",
			
 
				+            "3.D.2.b.ii",
			
 
				+            "3.D.2.b.ii.1",
			
 
				+            "3.D.2.b.ii.2",
			
 
				+            "3.D.2.b.ii.3",
			
 
				+            "3.D.2.b.iii",
			
 
				+            "3.D.2.b.iv",
			
 
				+            "3.D.2.b.v",
			
 
				             # additional subsector level of 3.F (3.C.1.b Biomass burning in cropland)
			
 
				-            '3.F.1', '3.F.2', '3.F.3',
			
 
				+            "3.F.1",
			
 
				+            "3.F.2",
			
 
				+            "3.F.3",
			
 
				             # additional subsector level of 3.G (3.C.2 Liming)
			
 
				-            '3.G.1', '3.G.2',
			
 
				+            "3.G.1",
			
 
				+            "3.G.2",
			
 
				             # additional subsector levels of 4.A.1 (3.A.1.a Forest land remaining forest land)
			
 
				-            '4.A.1.a', '4.A.1.a.i', '4.A.1.a.i.1', '4.A.1.a.i.1.a', '4.A.1.a.i.1.b', '4.A.1.a.i.1.c',
			
 
				-            '4.A.1.a.i.1.d', '4.A.1.a.i.1.e', '4.A.1.a.i.1.f', '4.A.1.a.i.1.g', '4.A.1.a.i.1.h',
			
 
				-            '4.A.1.a.i.1.i', '4.A.1.a.i.1.j', '4.A.1.a.i.1.k', '4.A.1.a.i.1.l', '4.A.1.a.i.2',
			
 
				-            '4.A.1.a.i.2.a', '4.A.1.a.i.2.b', '4.A.1.a.i.2.c', '4.A.1.a.i.2.d', '4.A.1.a.i.2.e',
			
 
				-            '4.A.1.a.i.2.f', '4.A.1.a.i.2.g', '4.A.1.a.i.2.h', '4.A.1.a.i.2.i', '4.A.1.a.i.2.j',
			
 
				-            '4.A.1.a.i.2.k', '4.A.1.a.i.2.l', '4.A.1.a.i.3', '4.A.1.a.i.3.a', '4.A.1.a.i.3.b',
			
 
				-            '4.A.1.a.i.3.c', '4.A.1.a.i.3.d', '4.A.1.a.i.3.e', '4.A.1.a.i.3.f', '4.A.1.a.i.3.g',
			
 
				-            '4.A.1.a.i.3.h', '4.A.1.a.i.3.i', '4.A.1.a.i.3.j', '4.A.1.a.i.3.k', '4.A.1.a.i.3.l',
			
 
				-            '4.A.1.a.ii', '4.A.1.a.ii.1', '4.A.1.a.ii.2', '4.A.1.a.ii.3', '4.A.1.a.ii.4',
			
 
				-            '4.A.1.a.ii.5', '4.A.1.a.ii.6', '4.A.1.a.ii.7', '4.A.1.b', '4.A.1.b.i', '4.A.1.b.i.1',
			
 
				-            '4.A.1.b.i.2', '4.A.1.b.i.3', '4.A.1.b.i.4', '4.A.1.b.ii', '4.A.1.b.ii.1', '4.A.1.b.ii.2',
			
 
				-            '4.A.1.b.iii', '4.A.1.b.iii.1', '4.A.1.b.iii.1.a', '4.A.1.b.iii.1.b', '4.A.1.b.iii.2',
			
 
				-            '4.A.1.b.iv', '4.A.1.c', '4.A.1.c.i', '4.A.1.c.ii',
			
 
				+            "4.A.1.a",
			
 
				+            "4.A.1.a.i",
			
 
				+            "4.A.1.a.i.1",
			
 
				+            "4.A.1.a.i.1.a",
			
 
				+            "4.A.1.a.i.1.b",
			
 
				+            "4.A.1.a.i.1.c",
			
 
				+            "4.A.1.a.i.1.d",
			
 
				+            "4.A.1.a.i.1.e",
			
 
				+            "4.A.1.a.i.1.f",
			
 
				+            "4.A.1.a.i.1.g",
			
 
				+            "4.A.1.a.i.1.h",
			
 
				+            "4.A.1.a.i.1.i",
			
 
				+            "4.A.1.a.i.1.j",
			
 
				+            "4.A.1.a.i.1.k",
			
 
				+            "4.A.1.a.i.1.l",
			
 
				+            "4.A.1.a.i.2",
			
 
				+            "4.A.1.a.i.2.a",
			
 
				+            "4.A.1.a.i.2.b",
			
 
				+            "4.A.1.a.i.2.c",
			
 
				+            "4.A.1.a.i.2.d",
			
 
				+            "4.A.1.a.i.2.e",
			
 
				+            "4.A.1.a.i.2.f",
			
 
				+            "4.A.1.a.i.2.g",
			
 
				+            "4.A.1.a.i.2.h",
			
 
				+            "4.A.1.a.i.2.i",
			
 
				+            "4.A.1.a.i.2.j",
			
 
				+            "4.A.1.a.i.2.k",
			
 
				+            "4.A.1.a.i.2.l",
			
 
				+            "4.A.1.a.i.3",
			
 
				+            "4.A.1.a.i.3.a",
			
 
				+            "4.A.1.a.i.3.b",
			
 
				+            "4.A.1.a.i.3.c",
			
 
				+            "4.A.1.a.i.3.d",
			
 
				+            "4.A.1.a.i.3.e",
			
 
				+            "4.A.1.a.i.3.f",
			
 
				+            "4.A.1.a.i.3.g",
			
 
				+            "4.A.1.a.i.3.h",
			
 
				+            "4.A.1.a.i.3.i",
			
 
				+            "4.A.1.a.i.3.j",
			
 
				+            "4.A.1.a.i.3.k",
			
 
				+            "4.A.1.a.i.3.l",
			
 
				+            "4.A.1.a.ii",
			
 
				+            "4.A.1.a.ii.1",
			
 
				+            "4.A.1.a.ii.2",
			
 
				+            "4.A.1.a.ii.3",
			
 
				+            "4.A.1.a.ii.4",
			
 
				+            "4.A.1.a.ii.5",
			
 
				+            "4.A.1.a.ii.6",
			
 
				+            "4.A.1.a.ii.7",
			
 
				+            "4.A.1.b",
			
 
				+            "4.A.1.b.i",
			
 
				+            "4.A.1.b.i.1",
			
 
				+            "4.A.1.b.i.2",
			
 
				+            "4.A.1.b.i.3",
			
 
				+            "4.A.1.b.i.4",
			
 
				+            "4.A.1.b.ii",
			
 
				+            "4.A.1.b.ii.1",
			
 
				+            "4.A.1.b.ii.2",
			
 
				+            "4.A.1.b.iii",
			
 
				+            "4.A.1.b.iii.1",
			
 
				+            "4.A.1.b.iii.1.a",
			
 
				+            "4.A.1.b.iii.1.b",
			
 
				+            "4.A.1.b.iii.2",
			
 
				+            "4.A.1.b.iv",
			
 
				+            "4.A.1.c",
			
 
				+            "4.A.1.c.i",
			
 
				+            "4.A.1.c.ii",
			
 
				             # additional subsector level in land converted to forest land
			
 
				-            '4.A.2.a.i', '4.A.2.a.ii', '4.A.2.b.i', '4.A.2.b.ii', '4.A.2.c.i', '4.A.2.c.ii',
			
 
				-            '4.A.2.d.i', '4.A.2.d.ii', '4.A.2.e.i', '4.A.2.e.ii',
			
 
				+            "4.A.2.a.i",
			
 
				+            "4.A.2.a.ii",
			
 
				+            "4.A.2.b.i",
			
 
				+            "4.A.2.b.ii",
			
 
				+            "4.A.2.c.i",
			
 
				+            "4.A.2.c.ii",
			
 
				+            "4.A.2.d.i",
			
 
				+            "4.A.2.d.ii",
			
 
				+            "4.A.2.e.i",
			
 
				+            "4.A.2.e.ii",
			
 
				             # subsectors of solid waste disposal might not match
			
 
				-            '5.A.1', '5.A.2', '5.A.3',
			
 
				+            "5.A.1",
			
 
				+            "5.A.2",
			
 
				+            "5.A.3",
			
 
				         ],
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 
			
 
				-cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
			
 
				-    '3': 'M.AG',
			
 
				-    '3.A': '3.A.1',
			
 
				-    '3.A.1': '3.A.1.a',
			
 
				-    '3.A.1.a': '3.A.1.a.i',
			
 
				-    '3.A.1.b': '3.A.1.a.ii',
			
 
				-    '3.A.2': '3.A.1.c',
			
 
				-    '3.A.3': '3.A.1.h',
			
 
				-    '3.A.4.a': '3.A.1.b',
			
 
				-    '3.A.4.b': '3.A.1.d',
			
 
				-    '3.A.4.c': '3.A.1.f',
			
 
				-    '3.A.4.d': '3.A.1.g',
			
 
				-    '3.A.4.e': '3.A.1.i',
			
 
				-    '3.B': '3.A.2',
			
 
				-    '3.B.1': '3.A.2.a',
			
 
				-    '3.B.1.a': '3.A.2.a.i',
			
 
				-    '3.B.1.b': '3.A.2.a.ii',
			
 
				-    '3.B.2': '3.A.2.c',
			
 
				-    '3.B.3': '3.A.2.h',
			
 
				-    '3.B.4.a': '3.A.2.b',
			
 
				-    '3.B.4.b': '3.A.2.d',
			
 
				-    '3.B.4.c': '3.A.2.f',
			
 
				-    '3.B.4.d': '3.A.2.g',
			
 
				-    '3.B.4.e': '3.A.2.i',
			
 
				-    '3.B.5': '3.C.6',
			
 
				-    '3.C': '3.C.7',
			
 
				-    '3.D.1': '3.C.4',
			
 
				-    '3.D.2': '3.C.5',
			
 
				-    '3.D.2.a': '3.C.5.a', # not in climate_categories
			
 
				-    '3.D.2.b': '3.C.5.b', # not in climate_categories
			
 
				-    '3.E': '3.C.1.c',
			
 
				-    '3.F': '3.C.1.b',
			
 
				-    '3.G': '3.C.2',
			
 
				-    '3.H': '3.C.3',
			
 
				-    '3.I': '3.C.8.a', # merge this with cat below
			
 
				-    '3.J': '3.C.8.b', # merge with cat above
			
 
				-    '4': 'M.LULUCF',
			
 
				-    '4.A': '3.B.1',
			
 
				-    '4.A.1': '3.B.1.a',
			
 
				-    '4.A.2': '3.B.1.b',
			
 
				-    '4.A.2.a': '3.B.1.b.i',
			
 
				-    '4.A.2.b': '3.B.1.b.ii',
			
 
				-    '4.A.2.c': '3.B.1.b.iii',
			
 
				-    '4.A.2.d': '3.B.1.b.iv',
			
 
				-    '4.A.2.e': '3.B.1.b.v',
			
 
				-    '4.B': '3.B.2',
			
 
				-    '4.B.1': '3.B.2.a',
			
 
				-    '4.B.2': '3.B.2.b',
			
 
				-    '4.B.2.a': '3.B.2.b.i',
			
 
				-    '4.B.2.b': '3.B.2.b.ii',
			
 
				-    '4.B.2.c': '3.B.2.b.iii',
			
 
				-    '4.B.2.d': '3.B.2.b.iv',
			
 
				-    '4.B.2.e': '3.B.2.b.v',
			
 
				-    '4.C': '3.B.3',
			
 
				-    '4.C.1': '3.B.3.a',
			
 
				-    '4.C.2': '3.B.3.b',
			
 
				-    '4.C.2.a': '3.B.3.b.i',
			
 
				-    '4.C.2.b': '3.B.3.b.ii',
			
 
				-    '4.C.2.c': '3.B.3.b.iii',
			
 
				-    '4.C.2.d': '3.B.3.b.iv',
			
 
				-    '4.C.2.e': '3.B.3.b.v',
			
 
				-    '4.D': '3.B.4',
			
 
				-    '4.D.1': '3.B.4.a',
			
 
				-    '4.D.2': '3.B.4.b',
			
 
				-    '4.D.2.a': '3.B.4.b.i',
			
 
				-    '4.D.2.b': '3.B.4.b.ii',
			
 
				-    '4.D.2.c': '3.B.4.b.iii',
			
 
				-    '4.D.2.d': '3.B.4.b.iv',
			
 
				-    '4.D.2.e': '3.B.4.b.v',
			
 
				-    '4.E': '3.B.5',
			
 
				-    '4.E.1': '3.B.5.a',
			
 
				-    '4.E.2': '3.B.5.b',
			
 
				-    '4.E.2.a': '3.B.5.b.i',
			
 
				-    '4.E.2.b': '3.B.5.b.ii',
			
 
				-    '4.E.2.c': '3.B.5.b.iii',
			
 
				-    '4.E.2.d': '3.B.5.b.iv',
			
 
				-    '4.E.2.e': '3.B.5.b.v',
			
 
				-    '4.F': '3.B.6',
			
 
				-    '4.F.1': '3.B.6.a',
			
 
				-    '4.F.2': '3.B.6.b',
			
 
				-    '4.F.2.a': '3.B.6.b.i',
			
 
				-    '4.F.2.b': '3.B.6.b.ii',
			
 
				-    '4.F.2.c': '3.B.6.b.iii',
			
 
				-    '4.F.2.d': '3.B.6.b.iv',
			
 
				-    '4.F.2.e': '3.B.6.b.v',
			
 
				-    '4.G': '3.D.1',
			
 
				-    '4.H': '3.D.2',
			
 
				-    '5': '4',
			
 
				-    '5.A': '4.A',
			
 
				-    '5.B': '4.B',
			
 
				-    '5.C': '4.C',
			
 
				-    '5.C.1': '4.C.1',
			
 
				-    '5.C.2': '4.C.2',
			
 
				-    '5.D': '4.D',
			
 
				-    '5.D.1': '4.D.1',
			
 
				-    '5.D.2': '4.D.2',
			
 
				-    '5.E': '4.E',
			
 
				+cat_mapping = {  # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
			
 
				+    "3": "M.AG",
			
 
				+    "3.A": "3.A.1",
			
 
				+    "3.A.1": "3.A.1.a",
			
 
				+    "3.A.1.a": "3.A.1.a.i",
			
 
				+    "3.A.1.b": "3.A.1.a.ii",
			
 
				+    "3.A.2": "3.A.1.c",
			
 
				+    "3.A.3": "3.A.1.h",
			
 
				+    "3.A.4.a": "3.A.1.b",
			
 
				+    "3.A.4.b": "3.A.1.d",
			
 
				+    "3.A.4.c": "3.A.1.f",
			
 
				+    "3.A.4.d": "3.A.1.g",
			
 
				+    "3.A.4.e": "3.A.1.i",
			
 
				+    "3.B": "3.A.2",
			
 
				+    "3.B.1": "3.A.2.a",
			
 
				+    "3.B.1.a": "3.A.2.a.i",
			
 
				+    "3.B.1.b": "3.A.2.a.ii",
			
 
				+    "3.B.2": "3.A.2.c",
			
 
				+    "3.B.3": "3.A.2.h",
			
 
				+    "3.B.4.a": "3.A.2.b",
			
 
				+    "3.B.4.b": "3.A.2.d",
			
 
				+    "3.B.4.c": "3.A.2.f",
			
 
				+    "3.B.4.d": "3.A.2.g",
			
 
				+    "3.B.4.e": "3.A.2.i",
			
 
				+    "3.B.5": "3.C.6",
			
 
				+    "3.C": "3.C.7",
			
 
				+    "3.D.1": "3.C.4",
			
 
				+    "3.D.2": "3.C.5",
			
 
				+    "3.D.2.a": "3.C.5.a",  # not in climate_categories
			
 
				+    "3.D.2.b": "3.C.5.b",  # not in climate_categories
			
 
				+    "3.E": "3.C.1.c",
			
 
				+    "3.F": "3.C.1.b",
			
 
				+    "3.G": "3.C.2",
			
 
				+    "3.H": "3.C.3",
			
 
				+    "3.I": "3.C.8.a",  # merge this with cat below
			
 
				+    "3.J": "3.C.8.b",  # merge with cat above
			
 
				+    "4": "M.LULUCF",
			
 
				+    "4.A": "3.B.1",
			
 
				+    "4.A.1": "3.B.1.a",
			
 
				+    "4.A.2": "3.B.1.b",
			
 
				+    "4.A.2.a": "3.B.1.b.i",
			
 
				+    "4.A.2.b": "3.B.1.b.ii",
			
 
				+    "4.A.2.c": "3.B.1.b.iii",
			
 
				+    "4.A.2.d": "3.B.1.b.iv",
			
 
				+    "4.A.2.e": "3.B.1.b.v",
			
 
				+    "4.B": "3.B.2",
			
 
				+    "4.B.1": "3.B.2.a",
			
 
				+    "4.B.2": "3.B.2.b",
			
 
				+    "4.B.2.a": "3.B.2.b.i",
			
 
				+    "4.B.2.b": "3.B.2.b.ii",
			
 
				+    "4.B.2.c": "3.B.2.b.iii",
			
 
				+    "4.B.2.d": "3.B.2.b.iv",
			
 
				+    "4.B.2.e": "3.B.2.b.v",
			
 
				+    "4.C": "3.B.3",
			
 
				+    "4.C.1": "3.B.3.a",
			
 
				+    "4.C.2": "3.B.3.b",
			
 
				+    "4.C.2.a": "3.B.3.b.i",
			
 
				+    "4.C.2.b": "3.B.3.b.ii",
			
 
				+    "4.C.2.c": "3.B.3.b.iii",
			
 
				+    "4.C.2.d": "3.B.3.b.iv",
			
 
				+    "4.C.2.e": "3.B.3.b.v",
			
 
				+    "4.D": "3.B.4",
			
 
				+    "4.D.1": "3.B.4.a",
			
 
				+    "4.D.2": "3.B.4.b",
			
 
				+    "4.D.2.a": "3.B.4.b.i",
			
 
				+    "4.D.2.b": "3.B.4.b.ii",
			
 
				+    "4.D.2.c": "3.B.4.b.iii",
			
 
				+    "4.D.2.d": "3.B.4.b.iv",
			
 
				+    "4.D.2.e": "3.B.4.b.v",
			
 
				+    "4.E": "3.B.5",
			
 
				+    "4.E.1": "3.B.5.a",
			
 
				+    "4.E.2": "3.B.5.b",
			
 
				+    "4.E.2.a": "3.B.5.b.i",
			
 
				+    "4.E.2.b": "3.B.5.b.ii",
			
 
				+    "4.E.2.c": "3.B.5.b.iii",
			
 
				+    "4.E.2.d": "3.B.5.b.iv",
			
 
				+    "4.E.2.e": "3.B.5.b.v",
			
 
				+    "4.F": "3.B.6",
			
 
				+    "4.F.1": "3.B.6.a",
			
 
				+    "4.F.2": "3.B.6.b",
			
 
				+    "4.F.2.a": "3.B.6.b.i",
			
 
				+    "4.F.2.b": "3.B.6.b.ii",
			
 
				+    "4.F.2.c": "3.B.6.b.iii",
			
 
				+    "4.F.2.d": "3.B.6.b.iv",
			
 
				+    "4.F.2.e": "3.B.6.b.v",
			
 
				+    "4.G": "3.D.1",
			
 
				+    "4.H": "3.D.2",
			
 
				+    "5": "4",
			
 
				+    "5.A": "4.A",
			
 
				+    "5.B": "4.B",
			
 
				+    "5.C": "4.C",
			
 
				+    "5.C.1": "4.C.1",
			
 
				+    "5.C.2": "4.C.2",
			
 
				+    "5.D": "4.D",
			
 
				+    "5.D.1": "4.D.1",
			
 
				+    "5.D.2": "4.D.2",
			
 
				+    "5.E": "4.E",
			
 
				 }
			
 
				 
			
 
				 # comments
			
@@ -176,11 +305,29 @@ cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in
 
				 # '3.A.4.g.ii.',
			
 
				 
			
 
				 aggregate_cats = {
			
 
				-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
			
 
				-    '3.C.1': {'sources': ['3.C.1.b','3.C.1.c'], 'name': 'Emissions from Biomass Burning'},
			
 
				-    '3.C.8': {'sources': ['3.C.8.a', '3.C.8.b'], 'name': 'Other'},
			
 
				-    '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7', '3.C.8'], 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    '3.D': {'sources': ['3.D.1', '3.D.2'], 'name': 'Other'},
			
 
				-    '3': {'sources': ['3.A', '3.B', '3.C', '3.D'], 'name': 'AFOLU'},
			
 
				+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+    "3.B": {
			
 
				+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
			
 
				+        "name": "Land",
			
 
				+    },
			
 
				+    "3.C.1": {
			
 
				+        "sources": ["3.C.1.b", "3.C.1.c"],
			
 
				+        "name": "Emissions from Biomass Burning",
			
 
				+    },
			
 
				+    "3.C.8": {"sources": ["3.C.8.a", "3.C.8.b"], "name": "Other"},
			
 
				+    "3.C": {
			
 
				+        "sources": [
			
 
				+            "3.C.1",
			
 
				+            "3.C.2",
			
 
				+            "3.C.3",
			
 
				+            "3.C.4",
			
 
				+            "3.C.5",
			
 
				+            "3.C.6",
			
 
				+            "3.C.7",
			
 
				+            "3.C.8",
			
 
				+        ],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+    },
			
 
				+    "3.D": {"sources": ["3.D.1", "3.D.2"], "name": "Other"},
			
 
				+    "3": {"sources": ["3.A", "3.B", "3.C", "3.D"], "name": "AFOLU"},
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
@@ -2,14 +2,15 @@
 
				 Read Chile's 2020 inventory from xlsx
			
 
				 
			
 
				 This script reads data from Chile's 2020 national inventory which is underlying BUR4.
			
 
				- Data is read from the xlsx file"""
			
 
				+Data is read from the xlsx file
			
 
				+"""
			
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				+from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				 from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
@@ -20,42 +21,49 @@ if __name__ == "__main__":
 
				     # ###
			
 
				 
			
 
				     # folders and files
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Chile' / 'BUR4'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Chile'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Chile" / "BUR4"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Chile"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'CHL_BUR4_2021_'
			
 
				+    output_filename = "CHL_BUR4_2021_"
			
 
				 
			
 
				-    inventory_file = 'Inventario_Nacional_de_GEI-1990-2018.xlsx'
			
 
				+    inventory_file = "Inventario_Nacional_de_GEI-1990-2018.xlsx"
			
 
				     years_to_read = range(1990, 2018 + 1)
			
 
				 
			
 
				     # configuration for conversion to PRIMAP2 data format
			
 
				     unit_row = "header"
			
 
				     unit_info = {
			
 
				-        'regexp_entity': r'(.*)\s\(.*\)$',
			
 
				-        'regexp_unit': r'.*\s\((.*)\)$',
			
 
				-        'default_unit': 'kt',
			
 
				-        'manual_repl_unit': {
			
 
				-            'kt CO₂ eq': 'ktCO2eq',
			
 
				-            'HFC (kt CO₂ eq)': 'ktCO2eq',
			
 
				-            'PFC (kt CO₂ eq)': 'ktCO2eq',
			
 
				-            'SF₆ (kt CO₂ eq)': 'ktCO2eq',
			
 
				+        "regexp_entity": r"(.*)\s\(.*\)$",
			
 
				+        "regexp_unit": r".*\s\((.*)\)$",
			
 
				+        "default_unit": "kt",
			
 
				+        "manual_repl_unit": {
			
 
				+            "kt CO₂ eq": "ktCO2eq",
			
 
				+            "HFC (kt CO₂ eq)": "ktCO2eq",
			
 
				+            "PFC (kt CO₂ eq)": "ktCO2eq",
			
 
				+            "SF₆ (kt CO₂ eq)": "ktCO2eq",
			
 
				+        },
			
 
				+        "manual_repl_entity": {
			
 
				+            "kt CO₂ eq": "KYOTOGHG (AR4GWP100)",
			
 
				+            "HFC (kt CO₂ eq)": "HFCS (AR4GWP100)",
			
 
				+            "PFC (kt CO₂ eq)": "PFCS (AR4GWP100)",
			
 
				+            "SF₆ (kt CO₂ eq)": "SF6 (AR4GWP100)",
			
 
				         },
			
 
				-        'manual_repl_entity': {
			
 
				-            'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
			
 
				-            'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
			
 
				-            'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
			
 
				-            'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
			
 
				-        }
			
 
				     }
			
 
				-    cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
			
 
				-                    'Categorías de fuente y sumidero de gases de efecto invernadero.1']
			
 
				+    cols_to_drop = [
			
 
				+        "Unnamed: 14",
			
 
				+        "Unnamed: 16",
			
 
				+        "Código IPCC.1",
			
 
				+        "Categorías de fuente y sumidero de gases de efecto invernadero.1",
			
 
				+    ]
			
 
				     # columns for category code and original category name
			
 
				-    index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
			
 
				+    index_cols = [
			
 
				+        "Código IPCC",
			
 
				+        "Categorías de fuente y sumidero de gases de efecto invernadero",
			
 
				+    ]
			
 
				 
			
 
				     # operations on long format DF
			
 
				-    cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
			
 
				+    cols_for_space_stripping = ["category", "orig_cat_name", "entity"]
			
 
				 
			
 
				     time_format = "%Y"
			
 
				 
			
@@ -85,7 +93,7 @@ if __name__ == "__main__":
 
				         "source": "CHL-GHG-Inventory",
			
 
				         "provenance": "measured",
			
 
				         "area": "CHL",
			
 
				-        "scenario": "BUR4"
			
 
				+        "scenario": "BUR4",
			
 
				     }
			
 
				 
			
 
				     coords_value_mapping = {
			
@@ -117,14 +125,14 @@ if __name__ == "__main__":
 
				     }
			
 
				 
			
 
				     coords_value_filling = {
			
 
				-        'category': {  # col to fill
			
 
				-            'orig_cat_name': {  # col to fill from
			
 
				-                'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
			
 
				-                'Tanque internacional': 'M.BK',
			
 
				-                'Aviación internacional': 'M.BK.A',
			
 
				-                'Navegación internacional': 'M.BK.M',
			
 
				-                'Operaciones multilaterales': 'M.MULTIOP',
			
 
				-                'Emisiones de CO2 de la biomasa': 'M.BIO',
			
 
				+        "category": {  # col to fill
			
 
				+            "orig_cat_name": {  # col to fill from (from value: to value)
			
 
				+                "Todas las emisiones y las absorciones nacionales": "0",
			
 
				+                "Tanque internacional": "M.BK",
			
 
				+                "Aviación internacional": "M.BK.A",
			
 
				+                "Navegación internacional": "M.BK.M",
			
 
				+                "Operaciones multilaterales": "M.MULTIOP",
			
 
				+                "Emisiones de CO2 de la biomasa": "M.BIO",
			
 
				             }
			
 
				         }
			
 
				     }
			
@@ -141,7 +149,9 @@ if __name__ == "__main__":
 
				     filter_keep = {}
			
 
				 
			
 
				     meta_data = {
			
 
				-        "references": "https://unfccc.int/documents/267936, https://snichile.mma.gob.cl/wp-content/uploads/2021/03/Inventario_Nacional_de_GEI-1990-2018.xlsx",
			
 
				+        "references": "https://unfccc.int/documents/267936, "
			
 
				+        "https://snichile.mma.gob.cl/wp-content/uploads/2021/03/"
			
 
				+        "Inventario_Nacional_de_GEI-1990-2018.xlsx",
			
 
				         "rights": "",
			
 
				         "contact": "mail@johannes-guetschow.de.de",
			
 
				         "title": "Chile: BUR4",
			
@@ -165,16 +175,24 @@ if __name__ == "__main__":
 
				     for year in years_to_read:
			
 
				         # read sheet for the year. Each sheet contains several tables,
			
 
				         # we only read the upper row as the other tables are summary tables
			
 
				-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=str(year),
			
 
				+            skiprows=2,
			
 
				+            nrows=442,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				         # drop the columns which are empty and repetition of the metadata for the second block
			
 
				-        df_current.drop(cols_to_drop, axis=1, inplace=True)
			
 
				+        df_current = df_current.drop(cols_to_drop, axis=1)
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set multi-index. necessary for the stack operation in the conversion to long format
			
 
				         df_current = df_current.set_index(index_cols)
			
 
				         # add unit row using information from entity row and add to index
			
 
				-        df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
			
 
				+        df_current = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_current, unit_row=unit_row, **unit_info
			
 
				+        )
			
 
				         # actual conversion to long format
			
 
				         df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
			
 
				         # aggregate to one df
			
@@ -192,7 +210,7 @@ if __name__ == "__main__":
 
				     for col in cols_for_space_stripping:
			
 
				         df_all[col] = df_all[col].str.strip()
			
 
				 
			
 
				-    df_all["category"] = df_all["category"].str.rstrip('.')
			
 
				+    df_all["category"] = df_all["category"].str.rstrip(".")
			
 
				 
			
 
				     data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_all,
			
@@ -204,11 +222,11 @@ if __name__ == "__main__":
 
				         coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				         filter_keep=filter_keep,
			
 
				-        meta_data=meta_data
			
 
				+        meta_data=meta_data,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
@@ -216,11 +234,16 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # save data to IF and native format
			
 
				     # ###
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # conversion to ipcc 2006 categories
			
@@ -236,10 +259,10 @@ if __name__ == "__main__":
 
				         coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				         filter_keep=filter_keep,
			
 
				-        meta_data=meta_data
			
 
				+        meta_data=meta_data,
			
 
				     )
			
 
				 
			
 
				-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
			
 
				+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				     filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
			
 
				     data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
			
 
				 
			
@@ -252,10 +275,10 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -263,7 +286,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
			
@@ -274,12 +305,19 @@ if __name__ == "__main__":
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				 
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				+        data_if_2006,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
@@ -1,12 +1,16 @@
 
				-# this script reads data from Chile's 2020 national inventory which is underlying BUR4
			
 
				-# Data is read from the xlsx file
			
 
				+"""
			
 
				+Read Chile's 2022 inventory from xlsx
			
 
				+
			
 
				+This script reads data from Chile's 2022 national inventory which is underlying BUR5.
			
 
				+Data is read from the xlsx file
			
 
				+"""
			
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				+from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				 from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
@@ -17,43 +21,50 @@ if __name__ == "__main__":
 
				     # ###
			
 
				 
			
 
				     # folders and files
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Chile' / 'BUR5'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Chile'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Chile" / "BUR5"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Chile"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'CHL_BUR5_2022_'
			
 
				+    output_filename = "CHL_BUR5_2022_"
			
 
				 
			
 
				-    inventory_file = '2022_GEI_CL.xlsx'
			
 
				+    inventory_file = "2022_GEI_CL.xlsx"
			
 
				     years_to_read = range(1990, 2020 + 1)
			
 
				-    time_format='%Y'
			
 
				+    time_format = "%Y"
			
 
				 
			
 
				     # configuration for conversion to PRIMAP2 data format
			
 
				     unit_row = "header"
			
 
				     unit_info = {
			
 
				-        'regexp_entity': r'(.*)\s\(.*\)$',
			
 
				-        'regexp_unit': r'.*\s\((.*)\)$',
			
 
				-        'default_unit': 'kt',
			
 
				-        'manual_repl_unit': {
			
 
				-            'kt CO₂ eq': 'ktCO2eq',
			
 
				-            'HFC (kt CO₂ eq)': 'ktCO2eq',
			
 
				-            'PFC (kt CO₂ eq)': 'ktCO2eq',
			
 
				-            'SF₆ (kt CO₂ eq)': 'ktCO2eq',
			
 
				+        "regexp_entity": r"(.*)\s\(.*\)$",
			
 
				+        "regexp_unit": r".*\s\((.*)\)$",
			
 
				+        "default_unit": "kt",
			
 
				+        "manual_repl_unit": {
			
 
				+            "kt CO₂ eq": "ktCO2eq",
			
 
				+            "HFC (kt CO₂ eq)": "ktCO2eq",
			
 
				+            "PFC (kt CO₂ eq)": "ktCO2eq",
			
 
				+            "SF₆ (kt CO₂ eq)": "ktCO2eq",
			
 
				+        },
			
 
				+        "manual_repl_entity": {
			
 
				+            "kt CO₂ eq": "KYOTOGHG (AR4GWP100)",
			
 
				+            "HFC (kt CO₂ eq)": "HFCS (AR4GWP100)",
			
 
				+            "PFC (kt CO₂ eq)": "PFCS (AR4GWP100)",
			
 
				+            "SF₆ (kt CO₂ eq)": "SF6 (AR4GWP100)",
			
 
				         },
			
 
				-        'manual_repl_entity': {
			
 
				-            'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
			
 
				-            'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
			
 
				-            'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
			
 
				-            'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
			
 
				-        }
			
 
				     }
			
 
				-    cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
			
 
				-                    'Categorías de fuente y sumidero de gases de efecto invernadero.1']
			
 
				+    cols_to_drop = [
			
 
				+        "Unnamed: 14",
			
 
				+        "Unnamed: 16",
			
 
				+        "Código IPCC.1",
			
 
				+        "Categorías de fuente y sumidero de gases de efecto invernadero.1",
			
 
				+    ]
			
 
				     # columns for category code and original category name
			
 
				-    index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
			
 
				+    index_cols = [
			
 
				+        "Código IPCC",
			
 
				+        "Categorías de fuente y sumidero de gases de efecto invernadero",
			
 
				+    ]
			
 
				 
			
 
				     # operations on long format DF
			
 
				-    cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
			
 
				+    cols_for_space_stripping = ["category", "orig_cat_name", "entity"]
			
 
				 
			
 
				     time_format = "%Y"
			
 
				 
			
@@ -83,7 +94,7 @@ if __name__ == "__main__":
 
				         "source": "CHL-GHG-Inventory",
			
 
				         "provenance": "measured",
			
 
				         "area": "CHL",
			
 
				-        "scenario": "BUR5"
			
 
				+        "scenario": "BUR5",
			
 
				     }
			
 
				 
			
 
				     coords_value_mapping = {
			
@@ -115,14 +126,14 @@ if __name__ == "__main__":
 
				     }
			
 
				 
			
 
				     coords_value_filling = {
			
 
				-        'category': {  # col to fill
			
 
				-            'orig_cat_name': {  # col to fill from
			
 
				-                'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
			
 
				-                'Tanque internacional': 'M.BK',
			
 
				-                'Aviación internacional': 'M.BK.A',
			
 
				-                'Navegación internacional': 'M.BK.M',
			
 
				-                'Operaciones multilaterales': 'M.MULTIOP',
			
 
				-                'Emisiones de CO2 de la biomasa': 'M.BIO',
			
 
				+        "category": {  # col to fill
			
 
				+            "orig_cat_name": {  # col to fill from (from value: to value)
			
 
				+                "Todas las emisiones y las absorciones nacionales": "0",
			
 
				+                "Tanque internacional": "M.BK",
			
 
				+                "Aviación internacional": "M.BK.A",
			
 
				+                "Navegación internacional": "M.BK.M",
			
 
				+                "Operaciones multilaterales": "M.MULTIOP",
			
 
				+                "Emisiones de CO2 de la biomasa": "M.BIO",
			
 
				             }
			
 
				         }
			
 
				     }
			
@@ -132,14 +143,19 @@ if __name__ == "__main__":
 
				             "entity": ["Absorciones CO₂", "Emisiones CO₂"],
			
 
				         },
			
 
				         "f2": {
			
 
				-            "orig_cat_name": ["Partidas informativas", "Todas las emisiones nacionales"],
			
 
				+            "orig_cat_name": [
			
 
				+                "Partidas informativas",
			
 
				+                "Todas las emisiones nacionales",
			
 
				+            ],
			
 
				         },
			
 
				     }
			
 
				 
			
 
				     filter_keep = {}
			
 
				 
			
 
				     meta_data = {
			
 
				-        "references": "https://unfccc.int/documents/624735, https://snichile.mma.gob.cl/wp-content/uploads/2023/04/2022_GEI_CL.xlsx",
			
 
				+        "references": "https://unfccc.int/documents/624735, "
			
 
				+        "https://snichile.mma.gob.cl/wp-content/uploads/2023/04/"
			
 
				+        "2022_GEI_CL.xlsx",
			
 
				         "rights": "",
			
 
				         "contact": "mail@johannes-guetschow.de.de",
			
 
				         "title": "Chile: BUR5",
			
@@ -163,16 +179,24 @@ if __name__ == "__main__":
 
				     for year in years_to_read:
			
 
				         # read sheet for the year. Each sheet contains several tables,
			
 
				         # we only read the upper row as the other tables are summary tables
			
 
				-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=str(year),
			
 
				+            skiprows=2,
			
 
				+            nrows=442,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				         # drop the columns which are empty and repetition of the metadata for the second block
			
 
				-        df_current.drop(cols_to_drop, axis=1, inplace=True)
			
 
				+        df_current = df_current.drop(cols_to_drop, axis=1)
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set multi-index. necessary for the stack operation in the conversion to long format
			
 
				         df_current = df_current.set_index(index_cols)
			
 
				         # add unit row using information from entity row and add to index
			
 
				-        df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
			
 
				+        df_current = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_current, unit_row=unit_row, **unit_info
			
 
				+        )
			
 
				         # actual conversion to long format
			
 
				         df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
			
 
				         # aggregate to one df
			
@@ -190,7 +214,7 @@ if __name__ == "__main__":
 
				     for col in cols_for_space_stripping:
			
 
				         df_all[col] = df_all[col].str.strip()
			
 
				 
			
 
				-    df_all["category"] = df_all["category"].str.rstrip('.')
			
 
				+    df_all["category"] = df_all["category"].str.rstrip(".")
			
 
				 
			
 
				     data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_all,
			
@@ -206,8 +230,7 @@ if __name__ == "__main__":
 
				         time_format=time_format,
			
 
				     )
			
 
				 
			
 
				-
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
@@ -215,11 +238,16 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # save data to IF and native format
			
 
				     # ###
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # conversion to ipcc 2006 categories
			
@@ -236,10 +264,10 @@ if __name__ == "__main__":
 
				         filter_remove=filter_remove,
			
 
				         filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				-        time_format=time_format
			
 
				+        time_format=time_format,
			
 
				     )
			
 
				 
			
 
				-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
			
 
				+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				     filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
			
 
				     data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
			
 
				 
			
@@ -252,10 +280,10 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -263,8 +291,18 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
			
 
				-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name"])
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				+            df_combine = df_combine.drop(
			
 
				+                columns=["category (IPCC2006_PRIMAP)", "orig_cat_name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
			
@@ -275,12 +313,19 @@ if __name__ == "__main__":
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				 
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				+        data_if_2006,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
@@ -1 +1,30 @@
 
				-"""Code to read Colombia's submissions"""
			
 
				+"""Read Colombia's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'COL'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=COL
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Colombia/read_COL_BUR3_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Colombia/read_COL_BUR3_from_xlsx.py
@@ -1,6 +1,10 @@
 
				-# this script reads data from Colombia's BUR3
			
 
				-# Data is read from the xlsx file which has been exported from the google docs
			
 
				-# spreadsheet which is linked in the BUR
			
 
				+"""
			
 
				+Read Colombia's 2020 inventory from xlsx
			
 
				+
			
 
				+this script reads data from Colombia's BUR3
			
 
				+Data is read from the xlsx file which has been exported from the google docs
			
 
				+spreadsheet which is linked in the BUR
			
 
				+"""
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
@@ -14,17 +18,17 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Colombia' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Colombia'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Colombia" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Colombia"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'COL_BUR3_2022_'
			
 
				+    output_filename = "COL_BUR3_2022_"
			
 
				 
			
 
				-    inventory_file = 'TR_1990-2018_BUR3-AR5_VF.xlsx'
			
 
				+    inventory_file = "TR_1990-2018_BUR3-AR5_VF.xlsx"
			
 
				     years_to_read = range(1990, 2018 + 1)
			
 
				 
			
 
				-    sheet_to_read = 'TR 1990-2018'
			
 
				+    sheet_to_read = "TR 1990-2018"
			
 
				     cols_to_read = range(0, 47)
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
@@ -37,7 +41,6 @@ if __name__ == "__main__":
 
				         "unit": "unit",
			
 
				     }
			
 
				 
			
 
				-
			
 
				     coords_terminologies = {
			
 
				         "area": "ISO3",
			
 
				         "category": "IPCC2006",
			
@@ -54,53 +57,52 @@ if __name__ == "__main__":
 
				     coords_value_mapping = {
			
 
				         "unit": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'Absorciones CO2': 'CO2 Absorptions',
			
 
				-            'Emisiones CO2': 'CO2 Emissions',
			
 
				-            'Emisiones netas (AR5GWP100)': 'KYOTOGHG (AR5GWP100)',
			
 
				-            'HFC-23': 'HFC23',
			
 
				-            'HFC-32': 'HFC32',
			
 
				+            "Absorciones CO2": "CO2 Absorptions",
			
 
				+            "Emisiones CO2": "CO2 Emissions",
			
 
				+            "Emisiones netas (AR5GWP100)": "KYOTOGHG (AR5GWP100)",
			
 
				+            "HFC-23": "HFC23",
			
 
				+            "HFC-32": "HFC32",
			
 
				             #'HFC-41': 'HFC41',
			
 
				-            'HFC-43-10mee': 'HFC4310mee',
			
 
				-            'HFC-125': 'HFC125',
			
 
				+            "HFC-43-10mee": "HFC4310mee",
			
 
				+            "HFC-125": "HFC125",
			
 
				             #'HFC-134': 'HFC134',
			
 
				-            'HFC-134a': 'HFC134a',
			
 
				-            'HFC-152a': 'HFC152a',
			
 
				+            "HFC-134a": "HFC134a",
			
 
				+            "HFC-152a": "HFC152a",
			
 
				             #'HFC-143': 'HFC143',
			
 
				-            'HFC-143a': 'HFC143a',
			
 
				-            'HFC-227ea': 'HFC227ea',
			
 
				-            'HFC-236fa': 'HFC236fa',
			
 
				+            "HFC-143a": "HFC143a",
			
 
				+            "HFC-227ea": "HFC227ea",
			
 
				+            "HFC-236fa": "HFC236fa",
			
 
				             #'HFC-245ca': 'HFC245ca',
			
 
				-            'HFC-245fa': 'HFC245fa',
			
 
				-            'HFC-365mfc': 'HFC365mfc',
			
 
				-            'PFC-116': 'C2F6',
			
 
				-            'PFC-14': 'CF4',
			
 
				+            "HFC-245fa": "HFC245fa",
			
 
				+            "HFC-365mfc": "HFC365mfc",
			
 
				+            "PFC-116": "C2F6",
			
 
				+            "PFC-14": "CF4",
			
 
				         },
			
 
				     }
			
 
				 
			
 
				-
			
 
				     filter_remove = {
			
 
				         "fGWP": {
			
 
				             "entity": [
			
 
				-                'Absorciones CO2 (AR5GWP100)',
			
 
				-                'Absorciones totales (AR5GWP100)',
			
 
				-                'CH4 (AR5GWP100)',
			
 
				-                'Emisiones CO2 (AR5GWP100)',
			
 
				-                'Total emisiones (AR5GWP100)',
			
 
				-                'HFC-125 (AR5GWP100)',
			
 
				-                'HFC-134a (AR5GWP100)',
			
 
				-                'HFC-143a (AR5GWP100)',
			
 
				-                'HFC-152a (AR5GWP100)',
			
 
				-                'HFC-227ea (AR5GWP100)',
			
 
				-                'HFC-23 (AR5GWP100)',
			
 
				-                'HFC-236fa (AR5GWP100)',
			
 
				-                'HFC-245fa (AR5GWP100)',
			
 
				-                'HFC-32 (AR5GWP100)',
			
 
				-                'HFC-365mfc (AR5GWP100)',
			
 
				-                'HFC-43-10mee (AR5GWP100)',
			
 
				-                'N2O (AR5GWP100)',
			
 
				-                'PFC-116 (AR5GWP100)',
			
 
				-                'PFC-14 (AR5GWP100)',
			
 
				-                'SF6 (AR5GWP100)',
			
 
				+                "Absorciones CO2 (AR5GWP100)",
			
 
				+                "Absorciones totales (AR5GWP100)",
			
 
				+                "CH4 (AR5GWP100)",
			
 
				+                "Emisiones CO2 (AR5GWP100)",
			
 
				+                "Total emisiones (AR5GWP100)",
			
 
				+                "HFC-125 (AR5GWP100)",
			
 
				+                "HFC-134a (AR5GWP100)",
			
 
				+                "HFC-143a (AR5GWP100)",
			
 
				+                "HFC-152a (AR5GWP100)",
			
 
				+                "HFC-227ea (AR5GWP100)",
			
 
				+                "HFC-23 (AR5GWP100)",
			
 
				+                "HFC-236fa (AR5GWP100)",
			
 
				+                "HFC-245fa (AR5GWP100)",
			
 
				+                "HFC-32 (AR5GWP100)",
			
 
				+                "HFC-365mfc (AR5GWP100)",
			
 
				+                "HFC-43-10mee (AR5GWP100)",
			
 
				+                "N2O (AR5GWP100)",
			
 
				+                "PFC-116 (AR5GWP100)",
			
 
				+                "PFC-14 (AR5GWP100)",
			
 
				+                "SF6 (AR5GWP100)",
			
 
				             ],
			
 
				         },
			
 
				     }
			
@@ -116,25 +118,33 @@ if __name__ == "__main__":
 
				         "institution": "UNFCCC",
			
 
				     }
			
 
				 
			
 
				-
			
 
				     # read the data
			
 
				-    data_raw = pd.read_excel(input_folder / inventory_file, sheet_name=sheet_to_read,
			
 
				-                             skiprows=0, nrows=15025, usecols=cols_to_read,
			
 
				-                             engine="openpyxl", header=None)
			
 
				+    data_raw = pd.read_excel(
			
 
				+        input_folder / inventory_file,
			
 
				+        sheet_name=sheet_to_read,
			
 
				+        skiprows=0,
			
 
				+        nrows=15025,
			
 
				+        usecols=cols_to_read,
			
 
				+        engine="openpyxl",
			
 
				+        header=None,
			
 
				+    )
			
 
				 
			
 
				     # fill the units to the right as for merged cells the unit is only in the first cell
			
 
				-    data_raw.iloc[unit_row] = data_raw.iloc[unit_row].fillna(axis=0, method="ffill")
			
 
				+    data_raw.iloc[unit_row] = data_raw.iloc[unit_row].ffill(axis=0)
			
 
				     merge_rows = [1, 2]
			
 
				     for row in merge_rows:
			
 
				         data_raw.iloc[row] = data_raw.iloc[row].astype(str).str.replace("nan", "")
			
 
				     data_raw.iloc[merge_rows[0]] = (
			
 
				-    data_raw.iloc[merge_rows[0]].astype(str) + " " + data_raw.iloc[
			
 
				-            merge_rows[1]].astype(str))
			
 
				+        data_raw.iloc[merge_rows[0]].astype(str)
			
 
				+        + " "
			
 
				+        + data_raw.iloc[merge_rows[1]].astype(str)
			
 
				+    )
			
 
				     data_raw.iloc[merge_rows[0]] = data_raw.iloc[merge_rows[0]].str.strip()
			
 
				     data_raw = data_raw.drop(index=data_raw.index[merge_rows[1]])
			
 
				 
			
 
				     # merge the category cols
			
 
				     def join_code_parts(series):
			
 
				+        """Create a code from the data in the individual columns"""
			
 
				         code = series.iloc[0]
			
 
				         for part in series.iloc[1:]:
			
 
				             if part != "nan":
			
@@ -143,10 +153,11 @@ if __name__ == "__main__":
 
				             code = "0"
			
 
				         return code
			
 
				 
			
 
				-    cat_columns = [0, 1, 2, 3, 4, 5] # xlsx cols are ["MOD","CAP","CAT","SCAT","NROM",
			
 
				+    cat_columns = [0, 1, 2, 3, 4, 5]  # xlsx cols are ["MOD","CAP","CAT","SCAT","NROM",
			
 
				     # "NUM"]
			
 
				-    data_raw["category"] = data_raw[cat_columns].astype(str).agg(func=join_code_parts,
			
 
				-                                                                 axis=1)
			
 
				+    data_raw["category"] = (
			
 
				+        data_raw[cat_columns].astype(str).agg(func=join_code_parts, axis=1)
			
 
				+    )
			
 
				     data_raw = data_raw.drop(columns=cat_columns)
			
 
				 
			
 
				     # prepare the dataframe for processig with primap2 functions
			
@@ -162,27 +173,29 @@ if __name__ == "__main__":
 
				     for year in years:
			
 
				         data_year = data_raw[data_raw["ANO"] == year]
			
 
				         data_year = data_year.drop(columns=["ANO", "Categorías de fuente y sumideros"])
			
 
				-        df_long_new = pm2.pm2io.nir_convert_df_to_long(data_year, year,
			
 
				-                                                       ["category", "unit", "entity",
			
 
				-                                                        "time", "data"])
			
 
				+        df_long_new = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            data_year, year, ["category", "unit", "entity", "time", "data"]
			
 
				+        )
			
 
				         if df_all is None:
			
 
				             df_all = df_long_new
			
 
				         else:
			
 
				-            df_all = pd.concat([df_all, df_long_new], axis=0, join='outer')
			
 
				+            df_all = pd.concat([df_all, df_long_new], axis=0, join="outer")
			
 
				 
			
 
				     df_all["category"] = df_all["category"].str[0]
			
 
				 
			
 
				     # map units
			
 
				-    df_all["unit"] = df_all["unit"].replace({
			
 
				-        'GEI DIRECTOS - Gg ': 'Gg',
			
 
				-        'GEI DIRECTOS - Gg CO2 equivalente': 'GgCO2eq',
			
 
				-    }
			
 
				+    df_all["unit"] = df_all["unit"].replace(
			
 
				+        {
			
 
				+            "GEI DIRECTOS - Gg ": "Gg",
			
 
				+            "GEI DIRECTOS - Gg CO2 equivalente": "GgCO2eq",
			
 
				+        }
			
 
				     )
			
 
				 
			
 
				     # add GWP information to entity
			
 
				     for entity in df_all["entity"].unique():
			
 
				-        df_all["entity"][(df_all["entity"] == entity) & (
			
 
				-                    df_all["unit"] == "GgCO2eq")] = f"{entity} (AR5GWP100)"
			
 
				+        df_all["entity"][
			
 
				+            (df_all["entity"] == entity) & (df_all["unit"] == "GgCO2eq")
			
 
				+        ] = f"{entity} (AR5GWP100)"
			
 
				 
			
 
				     # reset index before conversion to pm2 IF
			
 
				     df_all = df_all.reset_index(drop=True)
			
@@ -196,26 +209,25 @@ if __name__ == "__main__":
 
				     data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_all,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				-        convert_str=True
			
 
				-        )
			
 
				-
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				     # combine CO2 emissions and absorptions
			
 
				-    data_CO2 = data_if[data_if["entity"].isin([
			
 
				-        'CO2 Absorptions', 'CO2 Emissions'])]
			
 
				+    data_CO2 = data_if[data_if["entity"].isin(["CO2 Absorptions", "CO2 Emissions"])]
			
 
				 
			
 
				-    time_format = '%Y'
			
 
				+    time_format = "%Y"
			
 
				     time_columns = [
			
 
				         col
			
 
				-        for col in data_CO2.columns.values
			
 
				+        for col in data_CO2.columns.to_numpy()
			
 
				         if matches_time_format(col, time_format)
			
 
				     ]
			
 
				 
			
@@ -223,20 +235,23 @@ if __name__ == "__main__":
 
				         data_CO2[col] = pd.to_numeric(data_CO2[col], errors="coerce")
			
 
				 
			
 
				     data_CO2 = data_CO2.groupby(
			
 
				-        by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)',
			
 
				+        by=[
			
 
				+            "source",
			
 
				+            "scenario (PRIMAP)",
			
 
				+            "provenance",
			
 
				+            "area (ISO3)",
			
 
				             f"category ({coords_terminologies['category']})",
			
 
				-            'unit']).sum(min_count = 1)
			
 
				+            "unit",
			
 
				+        ]
			
 
				+    ).sum(min_count=1)
			
 
				 
			
 
				-    data_CO2.insert(0, 'entity', 'CO2')
			
 
				+    data_CO2.insert(0, "entity", "CO2")
			
 
				     data_CO2 = data_CO2.reset_index()
			
 
				 
			
 
				     data_if = pd.concat([data_if, data_CO2])
			
 
				 
			
 
				-
			
 
				-
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				 
			
 
				-
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
@@ -245,7 +260,12 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Indonesia's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'IDN'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=IDN
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py
@@ -1,6 +1,11 @@
 
				-# this script reads data from Indonesia's BUR3
			
 
				-# Data is read from pdf
			
 
				-# only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
			
 
				+"""
			
 
				+Read Indonesia's BUR3 from pdf
			
 
				+
			
 
				+This script reads data from Indonesia's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import camelot
			
 
				 import numpy as np
			
@@ -14,18 +19,19 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Indonesia' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Indonesia'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Indonesia" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Indonesia"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'IDN_BUR3_2021_'
			
 
				+    output_filename = "IDN_BUR3_2021_"
			
 
				 
			
 
				-    inventory_file = 'IndonesiaBUR_3_FINAL_REPORT_2.pdf'
			
 
				+    inventory_file = "IndonesiaBUR_3_FINAL_REPORT_2.pdf"
			
 
				 
			
 
				-    gwp_to_use = 'SARGWP100'
			
 
				+    gwp_to_use = "SARGWP100"
			
 
				 
			
 
				-    pages_to_read = range(61,65) # 65 is not read properly but contains almost no data anyway, so add it by hand '61-65'
			
 
				+    pages_to_read = range(61, 65)  # 65 is not read properly but contains almost no
			
 
				+    # data anyway, so add it by hand
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
@@ -36,17 +42,18 @@ if __name__ == "__main__":
 
				     # special header as category code and name in one column
			
 
				     header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				+    time_format = "%Y"
			
 
				 
			
 
				     # manual category codes
			
 
				     cat_codes_manual = {
			
 
				-        'Total National Emissions and Removals': '0',
			
 
				-        'Peat Decomposition': 'M.3.B.4.APD',
			
 
				-        'Peat Fire': 'M.3.B.4.APF',
			
 
				-        '4A1.2 Industrial Solid Waste Disposal': 'M.4.A.Ind',
			
 
				+        "Total National Emissions and Removals": "0",
			
 
				+        "Peat Decomposition": "M.3.B.4.APD",
			
 
				+        "Peat Fire": "M.3.B.4.APF",
			
 
				+        "4A1.2 Industrial Solid Waste Disposal": "M.4.A.Ind",
			
 
				         #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
			
 
				     }
			
 
				 
			
 
				-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
			
 
				+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9]{1,4})\s.*"
			
 
				 
			
 
				     coords_cols = {
			
 
				         "category": "category",
			
@@ -75,24 +82,26 @@ if __name__ == "__main__":
 
				         "unit": "PRIMAP1",
			
 
				         "category": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'Total 3 Gases': f"CO2CH4N2O ({gwp_to_use})",
			
 
				-            'Net CO2 (1) (2)': 'CO2',
			
 
				-            'CH4': f"CH4 ({gwp_to_use})",
			
 
				-            'N2O': f"N2O ({gwp_to_use})",
			
 
				-            'HFCs': f"HFCS ({gwp_to_use})",
			
 
				-            'PFCs': f"PFCS ({gwp_to_use})",
			
 
				-            'SF6': f"SF6 ({gwp_to_use})",
			
 
				-            'NOx': 'NOX',
			
 
				-            'CO': 'CO', # no mapping, just added for completeness here
			
 
				-            'NMVOCs': 'NMVOC',
			
 
				-            'SO2': 'SO2', # no mapping, just added for completeness here
			
 
				-            'Other halogenated gases with CO2 equivalent conversion factors (3)': f"OTHERHFCS ({gwp_to_use})",
			
 
				+            "Total 3 Gases": f"CO2CH4N2O ({gwp_to_use})",
			
 
				+            "Net CO2 (1) (2)": "CO2",
			
 
				+            "CH4": f"CH4 ({gwp_to_use})",
			
 
				+            "N2O": f"N2O ({gwp_to_use})",
			
 
				+            "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+            "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+            "SF6": f"SF6 ({gwp_to_use})",
			
 
				+            "NOx": "NOX",
			
 
				+            "CO": "CO",  # no mapping, just added for completeness here
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+            "SO2": "SO2",  # no mapping, just added for completeness here
			
 
				+            "Other halogenated gases with CO2 equivalent conversion factors (3)": f"OTHERHFCS ({gwp_to_use})",
			
 
				         },
			
 
				     }
			
 
				 
			
 
				-
			
 
				     filter_remove = {
			
 
				-        "fHFC": {"entity": 'Other halogenated gases without CO2 equivalent conversion factors (4)'}
			
 
				+        "fHFC": {
			
 
				+            "entity": "Other halogenated gases without CO2 equivalent conversion "
			
 
				+            "factors (4)"
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     filter_keep = {}
			
@@ -107,84 +116,113 @@ if __name__ == "__main__":
 
				     }
			
 
				 
			
 
				     # convert to mass units where possible
			
 
				-    entities_to_convert_to_mass = [
			
 
				-        'CH4', 'N2O', 'SF6'
			
 
				-    ]
			
 
				+    entities_to_convert_to_mass = ["CH4", "N2O", "SF6"]
			
 
				 
			
 
				-    # CO2 equivalents don't make sense for these substances, so unit has to be Gg instead of Gg CO2 equivalents as indicated in the table
			
 
				-    entities_to_fix_unit = [
			
 
				-        'NOx', 'CO', 'NMVOCs', 'SO2'
			
 
				-    ]
			
 
				+    # CO2 equivalents don't make sense for these substances, so unit has to be Gg
			
 
				+    # instead of Gg CO2 equivalents as indicated in the table
			
 
				+    entities_to_fix_unit = ["NOx", "CO", "NMVOCs", "SO2"]
			
 
				 
			
 
				     # add the data for the last page by hand as it's only one row
			
 
				     data_last_page = [
			
 
				-        ['5B Other (please specify)', 'Total 3 Gases', 'GgCO2eq', '2019', 'NE'],
			
 
				-        ['5B Other (please specify)', 'Net CO2 (1) (2)', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'CH4', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'N2O', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'HFCs', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'PFCs', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'SF6', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'Other halogenated gases with CO2 equivalent conversion factors (3)', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'Other halogenated gases without CO2 equivalent conversion factors (4)', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'NOx', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'CO', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'NMVOCs', 'GgCO2eq', '2019', np.nan],
			
 
				-        ['5B Other (please specify)', 'SO2', 'GgCO2eq', '2019', np.nan],
			
 
				+        ["5B Other (please specify)", "Total 3 Gases", "GgCO2eq", "2019", "NE"],
			
 
				+        ["5B Other (please specify)", "Net CO2 (1) (2)", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "CH4", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "N2O", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "HFCs", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "PFCs", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "SF6", "GgCO2eq", "2019", np.nan],
			
 
				+        [
			
 
				+            "5B Other (please specify)",
			
 
				+            "Other halogenated gases with CO2 equivalent conversion factors (3)",
			
 
				+            "GgCO2eq",
			
 
				+            "2019",
			
 
				+            np.nan,
			
 
				+        ],
			
 
				+        [
			
 
				+            "5B Other (please specify)",
			
 
				+            "Other halogenated gases without CO2 equivalent conversion factors (4)",
			
 
				+            "GgCO2eq",
			
 
				+            "2019",
			
 
				+            np.nan,
			
 
				+        ],
			
 
				+        ["5B Other (please specify)", "NOx", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "CO", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "NMVOCs", "GgCO2eq", "2019", np.nan],
			
 
				+        ["5B Other (please specify)", "SO2", "GgCO2eq", "2019", np.nan],
			
 
				     ]
			
 
				 
			
 
				     df_last_page = pd.DataFrame(data_last_page, columns=header_long)
			
 
				 
			
 
				     aggregate_cats = {
			
 
				-        '1.A.4': {'sources': ['1.A.4.a', '1.A.4.b'], 'name': 'Other Sectors (calculated)'},
			
 
				-        '2.A.4': {'sources': ['2.A.4.a', '2.A.4.b', '2.A.4.d'], 'name': 'Other Process uses of Carbonates (calculated)'},
			
 
				-        '2.B.8': {'sources': ['2.B.8.a', '2.B.8.b', '2.B.8.c', '2.B.8.f'], 'name': 'Petrochemical and Carbon Black production (calculated)'},
			
 
				-        '4.A': {'sources': ['4.A.2', 'M.4.A.Ind'], 'name': 'Solid Waste Disposal (calculated)'},
			
 
				+        "1.A.4": {
			
 
				+            "sources": ["1.A.4.a", "1.A.4.b"],
			
 
				+            "name": "Other Sectors (calculated)",
			
 
				+        },
			
 
				+        "2.A.4": {
			
 
				+            "sources": ["2.A.4.a", "2.A.4.b", "2.A.4.d"],
			
 
				+            "name": "Other Process uses of Carbonates (calculated)",
			
 
				+        },
			
 
				+        "2.B.8": {
			
 
				+            "sources": ["2.B.8.a", "2.B.8.b", "2.B.8.c", "2.B.8.f"],
			
 
				+            "name": "Petrochemical and Carbon Black production (calculated)",
			
 
				+        },
			
 
				+        "4.A": {
			
 
				+            "sources": ["4.A.2", "M.4.A.Ind"],
			
 
				+            "name": "Solid Waste Disposal (calculated)",
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				     aggregate_cats_N2O = {
			
 
				-        '3.A.2': {'sources': ['3.A.2.b'], 'name': '3A2 Manure Management'},
			
 
				-        '3.A': {'sources': ['3.A.2'], 'name': '3A Livestock'},
			
 
				+        "3.A.2": {"sources": ["3.A.2.b"], "name": "3A2 Manure Management"},
			
 
				+        "3.A": {"sources": ["3.A.2"], "name": "3A Livestock"},
			
 
				     }
			
 
				 
			
 
				     aggregate_cats_CO2CH4N2O = {
			
 
				-        '3.A.2': {'sources': ['3.A.2', '3.A.2.b'], 'name': '3A2 Manure Management'},
			
 
				+        "3.A.2": {"sources": ["3.A.2", "3.A.2.b"], "name": "3A2 Manure Management"},
			
 
				     }
			
 
				 
			
 
				     df_all = None
			
 
				 
			
 
				     for page in pages_to_read:
			
 
				-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=str(page),
			
 
				-                                  flavor='lattice')
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file), pages=str(page), flavor="lattice"
			
 
				+        )
			
 
				         df_this_table = tables[0].df
			
 
				         # replace line breaks, double, and triple spaces in category names
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				         # replace line breaks in units and entities
			
 
				-        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace('\n',
			
 
				-                                                                                    '')
			
 
				-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace('\n', '')
			
 
				+        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace(
			
 
				+            "\n", ""
			
 
				+        )
			
 
				+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace(
			
 
				+            "\n", ""
			
 
				+        )
			
 
				 
			
 
				-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
			
 
				-                                                           entity_row=entity_row,
			
 
				-                                                           regexp_entity=".*",
			
 
				-                                                           default_unit="GgCO2eq")  # , **unit_info)
			
 
				+        df_this_table = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_this_table,
			
 
				+            unit_row=unit_row,
			
 
				+            entity_row=entity_row,
			
 
				+            regexp_entity=".*",
			
 
				+            default_unit="GgCO2eq",
			
 
				+        )
			
 
				 
			
 
				         # set index and convert to long format
			
 
				         df_this_table = df_this_table.set_index(index_cols)
			
 
				-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
			
 
				-                                                              header_long)
			
 
				+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_this_table, year, header_long
			
 
				+        )
			
 
				         df_this_table_long["orig_cat_name"] = df_this_table_long["orig_cat_name"].str[0]
			
 
				 
			
 
				         # combine with tables for other sectors (merge not append)
			
 
				         if df_all is None:
			
 
				             df_all = df_this_table_long
			
 
				         else:
			
 
				-            df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
			
 
				+            df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
			
 
				 
			
 
				     # add the last page manually
			
 
				-    df_all = pd.concat([df_all, df_last_page], axis=0, join='outer')
			
 
				+    df_all = pd.concat([df_all, df_last_page], axis=0, join="outer")
			
 
				 
			
 
				     # fix the units of aerosols and precursors
			
 
				     for entity in entities_to_fix_unit:
			
@@ -196,22 +234,24 @@ if __name__ == "__main__":
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				     df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-        return m.group('code')
			
 
				-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_all["category"] = df_all["category"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				 
			
 
				     ###### convert to primap2 IF
			
 
				 
			
 
				     # replace "," with "" in data
			
 
				-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(',','', regex=False)
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(",", "", regex=False)
			
 
				 
			
 
				     # make sure all col headers are str
			
 
				     df_all.columns = df_all.columns.map(str)
			
 
				 
			
 
				-
			
 
				-
			
 
				     # ###
			
 
				     # convert to PRIMAP2 interchange format
			
 
				     # ###
			
@@ -222,12 +262,13 @@ if __name__ == "__main__":
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				-        convert_str=True
			
 
				-        )
			
 
				+        convert_str=True,
			
 
				+        time_format=time_format,
			
 
				+    )
			
 
				 
			
 
				     cat_label = "category (IPCC2006)"
			
 
				 
			
@@ -244,10 +285,9 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -255,8 +295,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				             df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
			
@@ -267,10 +314,10 @@ if __name__ == "__main__":
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-
			
 
				     # delete cat 3 for N2O as it's wrong
			
 
				-    index_3A_N2O = data_if[(data_if[cat_label] == '3') &
			
 
				-                           (data_if['entity'] == 'N2O')].index
			
 
				+    index_3A_N2O = data_if[
			
 
				+        (data_if[cat_label] == "3") & (data_if["entity"] == "N2O")
			
 
				+    ].index
			
 
				     data_if = data_if.drop(index_3A_N2O)
			
 
				 
			
 
				     # aggregate cat 3 for N2O
			
@@ -283,10 +330,10 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -294,11 +341,20 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
@@ -307,8 +363,9 @@ if __name__ == "__main__":
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				     # delete cat 3.A.2 for CO2CH4N2O as it's wrong
			
 
				-    index_3A2_CO2CH4N2O = data_if[(data_if[cat_label] == '3.A.2') &
			
 
				-                           (data_if['entity'] == 'CH4CO2N2O (SARGWP100)')].index
			
 
				+    index_3A2_CO2CH4N2O = data_if[
			
 
				+        (data_if[cat_label] == "3.A.2") & (data_if["entity"] == "CH4CO2N2O (SARGWP100)")
			
 
				+    ].index
			
 
				     data_if = data_if.drop(index_3A2_CO2CH4N2O)
			
 
				 
			
 
				     # aggregate cat 3 for N2O
			
@@ -321,10 +378,10 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -332,11 +389,20 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
@@ -344,7 +410,6 @@ if __name__ == "__main__":
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-
			
 
				     data_if.attrs = attrs
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
@@ -372,9 +437,11 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Israel's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'ISR'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=ISR
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py
@@ -1,73 +1,91 @@
 
				+"""Config for Israel's BUR3
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 #### configuration for trend tables
			
 
				 import locale
			
 
				 
			
 
				-gwp_to_use = 'SARGWP100'
			
 
				-terminology_proc = 'IPCC2006_PRIMAP'
			
 
				+gwp_to_use = "SARGWP100"
			
 
				+terminology_proc = "IPCC2006_PRIMAP"
			
 
				 # bunkers [0,1] need different specs
			
 
				 trend_table_def = {
			
 
				     # only GHG read, rest dropped
			
 
				-    'GHG': {
			
 
				-        'tables': [2],
			
 
				-        'cols_add': {
			
 
				-            'unit': 'ktCO2eq',
			
 
				-            'category': '0',
			
 
				+    "GHG": {
			
 
				+        "tables": [2],
			
 
				+        "cols_add": {
			
 
				+            "unit": "ktCO2eq",
			
 
				+            "category": "0",
			
 
				         },
			
 
				-        'given_col': 'entity',
			
 
				-        'take_only': ['Total GHG'],
			
 
				+        "given_col": "entity",
			
 
				+        "take_only": ["Total GHG"],
			
 
				     },
			
 
				-    'CO2': {
			
 
				-        'tables': [3],
			
 
				-        'cols_add': {
			
 
				-            'unit': 'kt',
			
 
				-            'entity': 'CO2',
			
 
				+    "CO2": {
			
 
				+        "tables": [3],
			
 
				+        "cols_add": {
			
 
				+            "unit": "kt",
			
 
				+            "entity": "CO2",
			
 
				         },
			
 
				-        'given_col': 'category',
			
 
				+        "given_col": "category",
			
 
				     },
			
 
				-    'CH4': {
			
 
				-        'tables': [5],
			
 
				-        'cols_add': {
			
 
				-            'unit': 'kt',
			
 
				-            'entity': 'CH4',
			
 
				+    "CH4": {
			
 
				+        "tables": [5],
			
 
				+        "cols_add": {
			
 
				+            "unit": "kt",
			
 
				+            "entity": "CH4",
			
 
				         },
			
 
				-        'given_col': 'category',
			
 
				-        'take_only': [
			
 
				-            'Total emissions', 'From fuel combustion',
			
 
				-            'From Industrial processes', 'From Agriculture'
			
 
				-        ], # ignore the waste time series as they don't cover the full sector
			
 
				+        "given_col": "category",
			
 
				+        "take_only": [
			
 
				+            "Total emissions",
			
 
				+            "From fuel combustion",
			
 
				+            "From Industrial processes",
			
 
				+            "From Agriculture",
			
 
				+        ],  # ignore the waste time series as they don't cover the full sector
			
 
				         # and lead to problems becaus eof the methodology chnage in the inventory
			
 
				     },
			
 
				-    'N2O': {
			
 
				-        'tables': [6],
			
 
				-        'cols_add': {
			
 
				-            'unit': 'kt',
			
 
				-            'entity': 'N2O',
			
 
				+    "N2O": {
			
 
				+        "tables": [6],
			
 
				+        "cols_add": {
			
 
				+            "unit": "kt",
			
 
				+            "entity": "N2O",
			
 
				         },
			
 
				-        'given_col': 'category',
			
 
				+        "given_col": "category",
			
 
				     },
			
 
				-    'FGases': {
			
 
				-        'tables': [7],
			
 
				-        'cols_add': {
			
 
				-            'unit': 'ktCO2eq',
			
 
				-            'category': '0',
			
 
				+    "FGases": {
			
 
				+        "tables": [7],
			
 
				+        "cols_add": {
			
 
				+            "unit": "ktCO2eq",
			
 
				+            "category": "0",
			
 
				         },
			
 
				-        'given_col': 'entity',
			
 
				+        "given_col": "entity",
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 #### configuration for inventory tables
			
 
				 inv_tab_conf = {
			
 
				-    'unit_row': 0,
			
 
				-    'entity_row': 0,
			
 
				-    'regex_unit': r"\((.*)\)",
			
 
				-    'regex_entity': r"^(.*)\s\(",
			
 
				-    'index_cols': 'category',
			
 
				-    'cat_pos': (0, 0),
			
 
				-    'header_long': ["category", "entity", "unit", "time", "data"],
			
 
				-    'header_2010': ["2010", "CO2 emissions (Gg)", "CO2 removals (Gg)",
			
 
				-                  "CH4 (Gg)", "N2O (Gg)", "CO (Gg)", "NOx (Gg)",
			
 
				-                  "NMVOCs (Gg)", "SOx (Gg)", "SF6 (CO2eq Gg)",
			
 
				-                  "HFCs (CO2eq Gg)", "PFCs (CO2eq Gg)"],
			
 
				-    'unit_repl': {
			
 
				+    "unit_row": 0,
			
 
				+    "entity_row": 0,
			
 
				+    "regex_unit": r"\((.*)\)",
			
 
				+    "regex_entity": r"^(.*)\s\(",
			
 
				+    "index_cols": "category",
			
 
				+    "cat_pos": (0, 0),
			
 
				+    "header_long": ["category", "entity", "unit", "time", "data"],
			
 
				+    "header_2010": [
			
 
				+        "2010",
			
 
				+        "CO2 emissions (Gg)",
			
 
				+        "CO2 removals (Gg)",
			
 
				+        "CH4 (Gg)",
			
 
				+        "N2O (Gg)",
			
 
				+        "CO (Gg)",
			
 
				+        "NOx (Gg)",
			
 
				+        "NMVOCs (Gg)",
			
 
				+        "SOx (Gg)",
			
 
				+        "SF6 (CO2eq Gg)",
			
 
				+        "HFCs (CO2eq Gg)",
			
 
				+        "PFCs (CO2eq Gg)",
			
 
				+    ],
			
 
				+    "unit_repl": {
			
 
				         "SF6 (CO2e Gg)": "GgCO2eq",
			
 
				         "HFCs (CO2eGg)": "GgCO2eq",
			
 
				         "PFCs (CO2e Gg)": "GgCO2eq",
			
@@ -78,13 +96,13 @@ inv_tab_conf = {
 
				 }
			
 
				 
			
 
				 inv_table_def = {
			
 
				-    '1996': {'tables': [1, 2]},
			
 
				-    '2000': {'tables': [3, 4]},
			
 
				-    '2005': {'tables': [5, 6]},
			
 
				-    '2010': {'tables': [7, 8]},
			
 
				-    '2015': {'tables': [9, 10, 11]},
			
 
				-    '2019': {'tables': [12, 13, 14]},
			
 
				-    '2020': {'tables': [15, 16]},
			
 
				+    "1996": {"tables": [1, 2]},
			
 
				+    "2000": {"tables": [3, 4]},
			
 
				+    "2005": {"tables": [5, 6]},
			
 
				+    "2010": {"tables": [7, 8]},
			
 
				+    "2015": {"tables": [9, 10, 11]},
			
 
				+    "2019": {"tables": [12, 13, 14]},
			
 
				+    "2020": {"tables": [15, 16]},
			
 
				 }
			
 
				 
			
 
				 #### configuration for PM2 format
			
@@ -110,114 +128,114 @@ coords_defaults = {
 
				 coords_value_mapping = {
			
 
				     "unit": "PRIMAP1",
			
 
				     "category": {
			
 
				-        'Total national emissions and removals': '24540',
			
 
				-        '0': '24540', # no mapping, just for completeness
			
 
				-        'Total emissions and removals': '24540',
			
 
				-        'Total emissions': '24540',
			
 
				-        '1. Energy': '1',
			
 
				-        'A. Fuel combustion (sectoral approach)': '1.A',
			
 
				-        'A. From fuel combustion': '1.A',
			
 
				-        'From fuel combustion': '1.A',
			
 
				-        '1. Energy industries': '1.A.1',
			
 
				-        '2. Manufacturing industries and construction': '1.A.2',
			
 
				-        '2. Manufacturing, industries and construction': '1.A.2',
			
 
				-        '3. Transport': '1.A.3',
			
 
				-        '4. Other sectors': '1.A.4',
			
 
				-        '4. Other': '1.A.4',
			
 
				-        'Commercial, institutional residential sectors': '1.A.4.ab', # not BURDI
			
 
				-        'Commercial, institutional': '1.A.4.a', #not BURDI
			
 
				-        'residential sectors': '1.A.4.b', #not BURDI
			
 
				-        'Agriculture, forestry and fishing': '1.A.4.c', # not BURDI
			
 
				-        '5. Other (please specify)': '1.A.5',
			
 
				-        'B. Fugitive emissions from fuels': '1.B',
			
 
				-        '1. Solid fuels': '1.B.1',
			
 
				-        '2. Oil and natural gas': '1.B.2',
			
 
				-        '2. Industrial processes': '2',
			
 
				-        'B. industrial processes': '2',
			
 
				-        'From Industrial processes': '2',
			
 
				-        'A. Mineral products': '2.A',
			
 
				-        'CEMENT PRODUCTION': '2.A.1',
			
 
				-        'PRODUCTION OF LIME': '2.A.2',
			
 
				-        'SODA ASH USE': '2.A.4.b',
			
 
				-        'ROAD PAVING WITH ASPHALT': '2.A.6',
			
 
				-        'Container Glass': '2.A.7.a',
			
 
				-        'B. Chemical industry': '2.B',
			
 
				-        'NITRIC ACID PRODUCTION': '2.B.2',
			
 
				-        'Ethylene': '2.B.5.b',
			
 
				-        'PRODUCTION OF OTHER CHEMICALS': '2.B.5.g', #not BURDI
			
 
				-        'Sulphuric Acid': '2.B.5.f', #not BURDI
			
 
				-        'C. Metal production': '2.C',
			
 
				-        'D. Other production': '2.D',
			
 
				-        'E. Production of halocarbons and sulphur hexafluoride': '2.E',
			
 
				-        'F. Consumption of halocarbons and sulphur hexafluoride': '2.F',
			
 
				-        'G. Other (IPPU)': '2.G',
			
 
				-        '3. Solvent and other product use': '3',
			
 
				-        '4. Agriculture': '4',
			
 
				-        'From Agriculture': '4',
			
 
				-        'From agriculture': '4',
			
 
				-        'A. Enteric fermentation': '4.A',
			
 
				-        'B. Manure management': '4.B',
			
 
				-        'C. Rice cultivation': '4.C',
			
 
				-        'D. Agricultural soils': '4.D',
			
 
				-        'E. Prescribed burning of savannahs': '4.E',
			
 
				-        'F. Field burning of agricultural residues': '4.F',
			
 
				-        'G. Other (Agri)': '4.G',
			
 
				-        '5. Land-use change and forestry': '5',
			
 
				-        'C. Land-use change and forestry': '5',
			
 
				-        'A. Changes in forest and other woody biomass stocks': '5.A',
			
 
				-        '2. Changes in forest and other woody biomass stocks': '5.A',
			
 
				-        'B. Forest and grassland conversion': '5.B',
			
 
				-        'C. Abandonment of managed lands': '5.C',
			
 
				-        'D. CO2 emissions and removals from soil': '5.D',
			
 
				-        '1. CO2 emissions and removals from soil': '5.D',
			
 
				-        'E. Other (LULUCF)': '5.E',
			
 
				+        "Total national emissions and removals": "24540",
			
 
				+        "0": "24540",  # no mapping, just for completeness
			
 
				+        "Total emissions and removals": "24540",
			
 
				+        "Total emissions": "24540",
			
 
				+        "1. Energy": "1",
			
 
				+        "A. Fuel combustion (sectoral approach)": "1.A",
			
 
				+        "A. From fuel combustion": "1.A",
			
 
				+        "From fuel combustion": "1.A",
			
 
				+        "1. Energy industries": "1.A.1",
			
 
				+        "2. Manufacturing industries and construction": "1.A.2",
			
 
				+        "2. Manufacturing, industries and construction": "1.A.2",
			
 
				+        "3. Transport": "1.A.3",
			
 
				+        "4. Other sectors": "1.A.4",
			
 
				+        "4. Other": "1.A.4",
			
 
				+        "Commercial, institutional residential sectors": "1.A.4.ab",  # not BURDI
			
 
				+        "Commercial, institutional": "1.A.4.a",  # not BURDI
			
 
				+        "residential sectors": "1.A.4.b",  # not BURDI
			
 
				+        "Agriculture, forestry and fishing": "1.A.4.c",  # not BURDI
			
 
				+        "5. Other (please specify)": "1.A.5",
			
 
				+        "B. Fugitive emissions from fuels": "1.B",
			
 
				+        "1. Solid fuels": "1.B.1",
			
 
				+        "2. Oil and natural gas": "1.B.2",
			
 
				+        "2. Industrial processes": "2",
			
 
				+        "B. industrial processes": "2",
			
 
				+        "From Industrial processes": "2",
			
 
				+        "A. Mineral products": "2.A",
			
 
				+        "CEMENT PRODUCTION": "2.A.1",
			
 
				+        "PRODUCTION OF LIME": "2.A.2",
			
 
				+        "SODA ASH USE": "2.A.4.b",
			
 
				+        "ROAD PAVING WITH ASPHALT": "2.A.6",
			
 
				+        "Container Glass": "2.A.7.a",
			
 
				+        "B. Chemical industry": "2.B",
			
 
				+        "NITRIC ACID PRODUCTION": "2.B.2",
			
 
				+        "Ethylene": "2.B.5.b",
			
 
				+        "PRODUCTION OF OTHER CHEMICALS": "2.B.5.g",  # not BURDI
			
 
				+        "Sulphuric Acid": "2.B.5.f",  # not BURDI
			
 
				+        "C. Metal production": "2.C",
			
 
				+        "D. Other production": "2.D",
			
 
				+        "E. Production of halocarbons and sulphur hexafluoride": "2.E",
			
 
				+        "F. Consumption of halocarbons and sulphur hexafluoride": "2.F",
			
 
				+        "G. Other (IPPU)": "2.G",
			
 
				+        "3. Solvent and other product use": "3",
			
 
				+        "4. Agriculture": "4",
			
 
				+        "From Agriculture": "4",
			
 
				+        "From agriculture": "4",
			
 
				+        "A. Enteric fermentation": "4.A",
			
 
				+        "B. Manure management": "4.B",
			
 
				+        "C. Rice cultivation": "4.C",
			
 
				+        "D. Agricultural soils": "4.D",
			
 
				+        "E. Prescribed burning of savannahs": "4.E",
			
 
				+        "F. Field burning of agricultural residues": "4.F",
			
 
				+        "G. Other (Agri)": "4.G",
			
 
				+        "5. Land-use change and forestry": "5",
			
 
				+        "C. Land-use change and forestry": "5",
			
 
				+        "A. Changes in forest and other woody biomass stocks": "5.A",
			
 
				+        "2. Changes in forest and other woody biomass stocks": "5.A",
			
 
				+        "B. Forest and grassland conversion": "5.B",
			
 
				+        "C. Abandonment of managed lands": "5.C",
			
 
				+        "D. CO2 emissions and removals from soil": "5.D",
			
 
				+        "1. CO2 emissions and removals from soil": "5.D",
			
 
				+        "E. Other (LULUCF)": "5.E",
			
 
				         # waste in 2006 categories, not BURDI as we will lose info of we map to BURDI and back
			
 
				-        '6. Waste': '6',
			
 
				-        'A. Solid waste disposal on land': '6.A',
			
 
				-        'From solid waste disposal on land': '6.A',
			
 
				-        'B. Waste-water handling': '6X.B', # combine with 6.D
			
 
				-        'From waste-water treatment': '6X.B', # not BURDI
			
 
				-        'C. Waste incineration': '6.C',
			
 
				-        'D. Other (please specify)': '6X.D', # combine with 6.E
			
 
				-        'B. Biological Treatment of Solid Waste': '6.B', # not BURDI
			
 
				-        'D.Waste-water handling': '6.D', # not BURDI
			
 
				-        'D. Waste-water handling': '6.D', # not BURDI
			
 
				-        'E. Other (Waste)': '6.E', # not BURDI
			
 
				-        '7. Other (please specify)': '7',
			
 
				-        'International bunkers': '14637',
			
 
				-        'Aviation': '14424',
			
 
				-        'Marine': '14423',
			
 
				-        'CO2 emissions from biomass': '14638',
			
 
				+        "6. Waste": "6",
			
 
				+        "A. Solid waste disposal on land": "6.A",
			
 
				+        "From solid waste disposal on land": "6.A",
			
 
				+        "B. Waste-water handling": "6X.B",  # combine with 6.D
			
 
				+        "From waste-water treatment": "6X.B",  # not BURDI
			
 
				+        "C. Waste incineration": "6.C",
			
 
				+        "D. Other (please specify)": "6X.D",  # combine with 6.E
			
 
				+        "B. Biological Treatment of Solid Waste": "6.B",  # not BURDI
			
 
				+        "D.Waste-water handling": "6.D",  # not BURDI
			
 
				+        "D. Waste-water handling": "6.D",  # not BURDI
			
 
				+        "E. Other (Waste)": "6.E",  # not BURDI
			
 
				+        "7. Other (please specify)": "7",
			
 
				+        "International bunkers": "14637",
			
 
				+        "Aviation": "14424",
			
 
				+        "Marine": "14423",
			
 
				+        "CO2 emissions from biomass": "14638",
			
 
				     },
			
 
				     "entity": {
			
 
				-        'Total GHG': f'KYOTOGHG ({gwp_to_use})',
			
 
				-        'Carbon Dioxide (CO2)': 'CO2',
			
 
				-        'CO2': 'CO2', # no mapping, just added for completeness here
			
 
				-        'CO2 emissions': 'CO2 emissions', # no mapping, just added for completeness here
			
 
				-        'CO2 removals': 'CO2 removals', # no mapping, just added for completeness here
			
 
				-        'CO2 Emissions': 'CO2 emissions',
			
 
				-        'CO2 Removals': 'CO2 removals',
			
 
				-        'Methane (CH4)': 'CH4',
			
 
				-        'CH4': 'CH4', # no mapping, just added for completeness here
			
 
				-        'Nitrous Oxides (N2O)': 'N2O',
			
 
				-        'NO2': 'NO2', # no mapping, just added for completeness here
			
 
				-        'Sulfur hexafluoride (SF6)': f'SF6 ({gwp_to_use})',
			
 
				-        'SF6': f'SF6 ({gwp_to_use})',
			
 
				-        "Hydrofluorocarbons (HFC'S)": f'HFCS ({gwp_to_use})',
			
 
				-        "HFCs": f'HFCS ({gwp_to_use})',
			
 
				-        "Perfluorocarbons (PFC'S)": f'PFCS ({gwp_to_use})',
			
 
				-        "PFCs": f'PFCS ({gwp_to_use})',
			
 
				-        'NOx': 'NOX',
			
 
				-        'Nox': 'NOX',
			
 
				-        'Co': 'CO',
			
 
				-        'CO': 'CO', # no mapping, just added for completeness here
			
 
				-        'NMVOCs': 'NMVOC',
			
 
				-        'SOx': 'SOX', # no mapping, just added for completeness here
			
 
				+        "Total GHG": f"KYOTOGHG ({gwp_to_use})",
			
 
				+        "Carbon Dioxide (CO2)": "CO2",
			
 
				+        "CO2": "CO2",  # no mapping, just added for completeness here
			
 
				+        "CO2 emissions": "CO2 emissions",  # no mapping, just added for completeness here
			
 
				+        "CO2 removals": "CO2 removals",  # no mapping, just added for completeness here
			
 
				+        "CO2 Emissions": "CO2 emissions",
			
 
				+        "CO2 Removals": "CO2 removals",
			
 
				+        "Methane (CH4)": "CH4",
			
 
				+        "CH4": "CH4",  # no mapping, just added for completeness here
			
 
				+        "Nitrous Oxides (N2O)": "N2O",
			
 
				+        "NO2": "NO2",  # no mapping, just added for completeness here
			
 
				+        "Sulfur hexafluoride (SF6)": f"SF6 ({gwp_to_use})",
			
 
				+        "SF6": f"SF6 ({gwp_to_use})",
			
 
				+        "Hydrofluorocarbons (HFC'S)": f"HFCS ({gwp_to_use})",
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "Perfluorocarbons (PFC'S)": f"PFCS ({gwp_to_use})",
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "NOx": "NOX",
			
 
				+        "Nox": "NOX",
			
 
				+        "Co": "CO",
			
 
				+        "CO": "CO",  # no mapping, just added for completeness here
			
 
				+        "NMVOCs": "NMVOC",
			
 
				+        "SOx": "SOX",  # no mapping, just added for completeness here
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 filter_remove = {
			
 
				-    'rem_cat': {'category': ['Memo items', 'G. Other (please specify)']},
			
 
				+    "rem_cat": {"category": ["Memo items", "G. Other (please specify)"]},
			
 
				     #'rem_ent': {'entity': ['GHG per capita', 'GHG per GDP (2015 prices)']},
			
 
				 }
			
 
				 
			
@@ -235,76 +253,88 @@ meta_data = {
 
				 #### for processing
			
 
				 # aggregate categories
			
 
				 cats_to_agg = {
			
 
				-    '1': {'sources': ['1.A'], 'name': 'Energy'}, # for trends
			
 
				-    '1.A.4': {'sources': ['1.A.4.a', '1.A.4.b', '1.A.4.c', '1.A.4.ab'],
			
 
				-              'name': 'Other sectors'},
			
 
				-    '2.A.4': {'sources': ['2.A.4.b'], 'name': 'Soda Ash'},
			
 
				-    '2.A.7': {'sources': ['2.A.7.a'], 'name': 'Other'},
			
 
				-    '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4', '2.A.6', '2.A.7'], 'name': 'Mineral Products'},
			
 
				-    '2.B.5': {'sources': ['2.B.5.f', '2.B.5.g'], 'name': 'Other'},
			
 
				-    '2.B': {'sources': ['2.B.2', '2.B.5'], 'name': 'Chemical Industry'},
			
 
				-    '6.D': {'sources': ['6.D', '6X.B'], 'name': 'Wastewater Treatment and Discharge'},
			
 
				+    "1": {"sources": ["1.A"], "name": "Energy"},  # for trends
			
 
				+    "1.A.4": {
			
 
				+        "sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c", "1.A.4.ab"],
			
 
				+        "name": "Other sectors",
			
 
				+    },
			
 
				+    "2.A.4": {"sources": ["2.A.4.b"], "name": "Soda Ash"},
			
 
				+    "2.A.7": {"sources": ["2.A.7.a"], "name": "Other"},
			
 
				+    "2.A": {
			
 
				+        "sources": ["2.A.1", "2.A.2", "2.A.4", "2.A.6", "2.A.7"],
			
 
				+        "name": "Mineral Products",
			
 
				+    },
			
 
				+    "2.B.5": {"sources": ["2.B.5.f", "2.B.5.g"], "name": "Other"},
			
 
				+    "2.B": {"sources": ["2.B.2", "2.B.5"], "name": "Chemical Industry"},
			
 
				+    "6.D": {"sources": ["6.D", "6X.B"], "name": "Wastewater Treatment and Discharge"},
			
 
				     #'6.E': {'sources': ['6.E', '6X.D'], 'Other'}, # currently empty
			
 
				 }
			
 
				 
			
 
				 # downscale
			
 
				 # 1.A.4.ab
			
 
				 downscaling = {
			
 
				-    'sectors': {
			
 
				-        '24540': {
			
 
				-            'basket': '24540',
			
 
				-            'basket_contents': ['2'],
			
 
				-            'entities': ['SF6', 'HFCS (SARGWP100)', 'PFCS (SARGWP100)'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+    "sectors": {
			
 
				+        "24540": {
			
 
				+            "basket": "24540",
			
 
				+            "basket_contents": ["2"],
			
 
				+            "entities": ["SF6", "HFCS (SARGWP100)", "PFCS (SARGWP100)"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '1.A': {
			
 
				-            'basket': '1.A',
			
 
				-            'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				-            'tolerance': 0.05, # some inconsistencies (rounding?)
			
 
				+        "1.A": {
			
 
				+            "basket": "1.A",
			
 
				+            "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
			
 
				+            "entities": ["CO2", "CH4", "N2O"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				+            "tolerance": 0.05,  # some inconsistencies (rounding?)
			
 
				         },
			
 
				-        '1.A.4.ab': {
			
 
				-            'basket': '1.A.4.ab',
			
 
				-            'basket_contents': ['1.A.4.a', '1.A.4.b'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O', 'SOX', 'NOX', 'CO'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "1.A.4.ab": {
			
 
				+            "basket": "1.A.4.ab",
			
 
				+            "basket_contents": ["1.A.4.a", "1.A.4.b"],
			
 
				+            "entities": ["CO2", "CH4", "N2O", "SOX", "NOX", "CO"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '1.A.4': {
			
 
				-            'basket': '1.A.4',
			
 
				-            'basket_contents': ['1.A.4.a', '1.A.4.b', '1.A.4.c'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "1.A.4": {
			
 
				+            "basket": "1.A.4",
			
 
				+            "basket_contents": ["1.A.4.a", "1.A.4.b", "1.A.4.c"],
			
 
				+            "entities": ["CO2", "CH4", "N2O"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '2': {
			
 
				-            'basket': '2',
			
 
				-            'basket_contents': ['2.A', '2.B', '2.F'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O', 'SF6', 'PFCS (SARGWP100)', 'HFCS (SARGWP100)'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "2": {
			
 
				+            "basket": "2",
			
 
				+            "basket_contents": ["2.A", "2.B", "2.F"],
			
 
				+            "entities": [
			
 
				+                "CO2",
			
 
				+                "CH4",
			
 
				+                "N2O",
			
 
				+                "SF6",
			
 
				+                "PFCS (SARGWP100)",
			
 
				+                "HFCS (SARGWP100)",
			
 
				+            ],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '2.A': {
			
 
				-            'basket': '2.A',
			
 
				-            'basket_contents': ['2.A.1', '2.A.2', '2.A.4', '2.A.7'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "2.A": {
			
 
				+            "basket": "2.A",
			
 
				+            "basket_contents": ["2.A.1", "2.A.2", "2.A.4", "2.A.7"],
			
 
				+            "entities": ["CO2", "CH4", "N2O"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '2.B': {
			
 
				-            'basket': '2.B',
			
 
				-            'basket_contents': ['2.B.2', '2.B.5'],
			
 
				-            'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "2.B": {
			
 
				+            "basket": "2.B",
			
 
				+            "basket_contents": ["2.B.2", "2.B.5"],
			
 
				+            "entities": ["CO2", "CH4", "N2O"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '4': {
			
 
				-            'basket': '4',
			
 
				-            'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E', '4.F', '4.G'],
			
 
				-            'entities': ['CH4', 'N2O'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "4": {
			
 
				+            "basket": "4",
			
 
				+            "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E", "4.F", "4.G"],
			
 
				+            "entities": ["CH4", "N2O"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				-        '5': {
			
 
				-            'basket': '5',
			
 
				-            'basket_contents': ['5.A', '5.D'], # the other sectors are 0
			
 
				-            'entities': ['CO2'],
			
 
				-            'dim': f"category ({coords_terminologies['category']})",
			
 
				+        "5": {
			
 
				+            "basket": "5",
			
 
				+            "basket_contents": ["5.A", "5.D"],  # the other sectors are 0
			
 
				+            "entities": ["CO2"],
			
 
				+            "dim": f"category ({coords_terminologies['category']})",
			
 
				         },
			
 
				     },
			
 
				 }
			
@@ -312,125 +342,190 @@ downscaling = {
 
				 # map to IPCC2006
			
 
				 cat_conversion = {
			
 
				     # ANNEXI to come (low priority as we read from CRF files)
			
 
				-    'mapping': {
			
 
				-        '1': '1',
			
 
				-        '1.A': '1.A',
			
 
				-        '1.A.1': '1.A.1',
			
 
				-        '1.A.2': '1.A.2',
			
 
				-        '1.A.3': '1.A.3',
			
 
				-        '1.A.4': '1.A.4',
			
 
				-        '1.A.4.a': '1.A.4.a',
			
 
				-        '1.A.4.b': '1.A.4.b',
			
 
				-        '1.A.4.c': '1.A.4.c',
			
 
				-        '1.A.5': '1.A.5', # currently not needed
			
 
				-        '1.B': '1.B', # currently not needed
			
 
				-        '1.B.1': '1.B.1', # currently not needed
			
 
				-        '1.B.2': '1.B.2', # currently not needed
			
 
				-        '2': '2',
			
 
				-        '2.A': '2.A',
			
 
				-        '2.A.1': '2.A.1', # cement
			
 
				-        '2.A.2': '2.A.2', # lime
			
 
				-        '2.A.4': '2.A.4.b', # soda ash
			
 
				-        '2.A.6': '2.A.5', # road paving with asphalt -> other
			
 
				-        '2.A.7.a': '2.A.3', # glass
			
 
				-        '2.B': 'M.2.B_2.B',
			
 
				-        '2.B.2': '2.B.2', # nitric acid
			
 
				-        '2.B.5.b': '2.B.8.b', # Ethylene
			
 
				-        '2.B.5.f': 'M.2.B.10.a', # sulphuric acid
			
 
				-        '2.B.5.g': 'M.2.B.10.b', # other chemicals
			
 
				-        '2.C': '2.C',
			
 
				-        '2.D': 'M.2.H.1_2',
			
 
				-        '2.E': '2.B.9',
			
 
				-        '2.F': '2.F',
			
 
				-        '2.G': '2.H.3',
			
 
				-        '4': 'M.AG',
			
 
				-        '4.A': '3.A.1',
			
 
				-        '4.B': '3.A.2',
			
 
				-        '4.C': '3.C.7',
			
 
				-        '4.D': 'M.3.C.45.AG',
			
 
				-        '4.E': '3.C.1.c',
			
 
				-        '4.F': '3.C.1.b',
			
 
				-        '4.G': '3.C.8',
			
 
				-        '5': 'M.LULUCF',
			
 
				-        '6': '4',
			
 
				-        '6.A': '4.A',
			
 
				-        '6.B': '4.B',
			
 
				-        '6.C': '4.C',
			
 
				-        '6.D': '4.D',
			
 
				-        '24540': '0',
			
 
				-        '15163': 'M.0.EL',
			
 
				-        '14637': 'M.BK',
			
 
				-        '14424': 'M.BK.A',
			
 
				-        '14423': 'M.BK.M',
			
 
				-        '14638': 'M.BIO',
			
 
				-        '7': '5',
			
 
				-    }, #5.A-D ignored as not fitting 2006 cats
			
 
				-
			
 
				-    'aggregate': {
			
 
				-        '2.A.4': {'sources': ['2.A.4.b'], 'name': 'Other uses of soda ashes'},
			
 
				-        '2.B.8': {'sources': ['2.B.8.b'], 'name': 'Petrochemical and Carbon Black production'},
			
 
				-        '2.B.10': {'sources': ['M.2.B.10.a', 'M.2.B.10.b'], 'name': 'Other'},
			
 
				-        '2.B': {'sources': ['2.B.2', '2.B.8', '2.B.9', '2.B.10'], 'name': 'Chemical Industry'},
			
 
				-        '2.H': {'sources': ['M.2.H.1_2', '2.H.3'], 'name': 'Other'},
			
 
				+    "mapping": {
			
 
				+        "1": "1",
			
 
				+        "1.A": "1.A",
			
 
				+        "1.A.1": "1.A.1",
			
 
				+        "1.A.2": "1.A.2",
			
 
				+        "1.A.3": "1.A.3",
			
 
				+        "1.A.4": "1.A.4",
			
 
				+        "1.A.4.a": "1.A.4.a",
			
 
				+        "1.A.4.b": "1.A.4.b",
			
 
				+        "1.A.4.c": "1.A.4.c",
			
 
				+        "1.A.5": "1.A.5",  # currently not needed
			
 
				+        "1.B": "1.B",  # currently not needed
			
 
				+        "1.B.1": "1.B.1",  # currently not needed
			
 
				+        "1.B.2": "1.B.2",  # currently not needed
			
 
				+        "2": "2",
			
 
				+        "2.A": "2.A",
			
 
				+        "2.A.1": "2.A.1",  # cement
			
 
				+        "2.A.2": "2.A.2",  # lime
			
 
				+        "2.A.4": "2.A.4.b",  # soda ash
			
 
				+        "2.A.6": "2.A.5",  # road paving with asphalt -> other
			
 
				+        "2.A.7.a": "2.A.3",  # glass
			
 
				+        "2.B": "M.2.B_2.B",
			
 
				+        "2.B.2": "2.B.2",  # nitric acid
			
 
				+        "2.B.5.b": "2.B.8.b",  # Ethylene
			
 
				+        "2.B.5.f": "M.2.B.10.a",  # sulphuric acid
			
 
				+        "2.B.5.g": "M.2.B.10.b",  # other chemicals
			
 
				+        "2.C": "2.C",
			
 
				+        "2.D": "M.2.H.1_2",
			
 
				+        "2.E": "2.B.9",
			
 
				+        "2.F": "2.F",
			
 
				+        "2.G": "2.H.3",
			
 
				+        "4": "M.AG",
			
 
				+        "4.A": "3.A.1",
			
 
				+        "4.B": "3.A.2",
			
 
				+        "4.C": "3.C.7",
			
 
				+        "4.D": "M.3.C.45.AG",
			
 
				+        "4.E": "3.C.1.c",
			
 
				+        "4.F": "3.C.1.b",
			
 
				+        "4.G": "3.C.8",
			
 
				+        "5": "M.LULUCF",
			
 
				+        "6": "4",
			
 
				+        "6.A": "4.A",
			
 
				+        "6.B": "4.B",
			
 
				+        "6.C": "4.C",
			
 
				+        "6.D": "4.D",
			
 
				+        "24540": "0",
			
 
				+        "15163": "M.0.EL",
			
 
				+        "14637": "M.BK",
			
 
				+        "14424": "M.BK.A",
			
 
				+        "14423": "M.BK.M",
			
 
				+        "14638": "M.BIO",
			
 
				+        "7": "5",
			
 
				+    },  # 5.A-D ignored as not fitting 2006 cats
			
 
				+    "aggregate": {
			
 
				+        "2.A.4": {"sources": ["2.A.4.b"], "name": "Other uses of soda ashes"},
			
 
				+        "2.B.8": {
			
 
				+            "sources": ["2.B.8.b"],
			
 
				+            "name": "Petrochemical and Carbon Black production",
			
 
				+        },
			
 
				+        "2.B.10": {"sources": ["M.2.B.10.a", "M.2.B.10.b"], "name": "Other"},
			
 
				+        "2.B": {
			
 
				+            "sources": ["2.B.2", "2.B.8", "2.B.9", "2.B.10"],
			
 
				+            "name": "Chemical Industry",
			
 
				+        },
			
 
				+        "2.H": {"sources": ["M.2.H.1_2", "2.H.3"], "name": "Other"},
			
 
				         # '2': {'sources': ['2.A', '2.B', '2.C', '2.F', '2.H'],
			
 
				         #       'name': 'Industrial Processes and Product Use'},
			
 
				-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-        '3.C.1': {'sources': ['3.C.1.b', '3.C.1.c'],
			
 
				-                     'name': 'Emissions from biomass burning'},
			
 
				-        'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'],
			
 
				-                     'name': 'Emissions from biomass burning (Agriculture)'},
			
 
				-        '3.C': {'sources': ['3.C.1', 'M.3.C.45.AG', '3.C.7', '3.C.8'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-        'M.3.C.AG': {'sources': ['M.3.C.1.AG', 'M.3.C.45.AG', '3.C.7', '3.C.8'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land ('
			
 
				-                             'Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
			
 
				-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National total '
			
 
				-                                                                    'excluding LULUCF'},
			
 
				+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+        "3.C.1": {
			
 
				+            "sources": ["3.C.1.b", "3.C.1.c"],
			
 
				+            "name": "Emissions from biomass burning",
			
 
				+        },
			
 
				+        "M.3.C.1.AG": {
			
 
				+            "sources": ["3.C.1.b", "3.C.1.c"],
			
 
				+            "name": "Emissions from biomass burning (Agriculture)",
			
 
				+        },
			
 
				+        "3.C": {
			
 
				+            "sources": ["3.C.1", "M.3.C.45.AG", "3.C.7", "3.C.8"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+        },
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7", "3.C.8"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land ("
			
 
				+            "Agriculture)",
			
 
				+        },
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG"],
			
 
				+            "name": "Agriculture excluding livestock",
			
 
				+        },
			
 
				+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+            "name": "National total " "excluding LULUCF",
			
 
				+        },
			
 
				     },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': 'SARGWP100',
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": "SARGWP100",
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 sectors_to_save = [
			
 
				-    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.4.a', '1.A.4.b', '1.A.4.c',
			
 
				-    '1.A.5',
			
 
				-    '1.B', '1.B.1', '1.B.2',
			
 
				-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.5',
			
 
				-    '2.B', '2.B.2', '2.B.8', '2.B.9', '2.B.10', '2.C', '2.F', '2.H',
			
 
				-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
			
 
				-    '3.C', '3.C.1', 'M.3.C.1.AG', '3.C.7', 'M.3.C.45.AG', '3.C.8', 'M.3.C.AG',
			
 
				-    'M.LULUCF', 'M.AG.ELV',
			
 
				-    '4', '4.A', '4.B', '4.C', '4.D',
			
 
				-    '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO', '5']
			
 
				+    "1",
			
 
				+    "1.A",
			
 
				+    "1.A.1",
			
 
				+    "1.A.2",
			
 
				+    "1.A.3",
			
 
				+    "1.A.4",
			
 
				+    "1.A.4.a",
			
 
				+    "1.A.4.b",
			
 
				+    "1.A.4.c",
			
 
				+    "1.A.5",
			
 
				+    "1.B",
			
 
				+    "1.B.1",
			
 
				+    "1.B.2",
			
 
				+    "2",
			
 
				+    "2.A",
			
 
				+    "2.A.1",
			
 
				+    "2.A.2",
			
 
				+    "2.A.3",
			
 
				+    "2.A.4",
			
 
				+    "2.A.5",
			
 
				+    "2.B",
			
 
				+    "2.B.2",
			
 
				+    "2.B.8",
			
 
				+    "2.B.9",
			
 
				+    "2.B.10",
			
 
				+    "2.C",
			
 
				+    "2.F",
			
 
				+    "2.H",
			
 
				+    "3",
			
 
				+    "M.AG",
			
 
				+    "3.A",
			
 
				+    "3.A.1",
			
 
				+    "3.A.2",
			
 
				+    "3.C",
			
 
				+    "3.C.1",
			
 
				+    "M.3.C.1.AG",
			
 
				+    "3.C.7",
			
 
				+    "M.3.C.45.AG",
			
 
				+    "3.C.8",
			
 
				+    "M.3.C.AG",
			
 
				+    "M.LULUCF",
			
 
				+    "M.AG.ELV",
			
 
				+    "4",
			
 
				+    "4.A",
			
 
				+    "4.B",
			
 
				+    "4.C",
			
 
				+    "4.D",
			
 
				+    "0",
			
 
				+    "M.0.EL",
			
 
				+    "M.BK",
			
 
				+    "M.BK.A",
			
 
				+    "M.BK.M",
			
 
				+    "M.BIO",
			
 
				+    "5",
			
 
				+]
			
 
				 
			
 
				 
			
 
				 # gas baskets
			
 
				 gas_baskets = {
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)': ['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)': ['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				 }
			
 
				 
			
 
				 basket_copy = {
			
 
				-    'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-    'entities': ["HFCS", "PFCS"],
			
 
				-    'source_GWP': gwp_to_use,
			
 
				+    "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+    "entities": ["HFCS", "PFCS"],
			
 
				+    "source_GWP": gwp_to_use,
			
 
				 }
			
 
				 
			
 
				+
			
 
				 #### functions
			
 
				 def is_int(input: str) -> bool:
			
 
				+    """Check if a string evaluates to an integer under a defined locale"""
			
 
				     try:
			
 
				         locale.atoi(input)
			
 
				-        return True
			
 
				-    except:
			
 
				+        return True  # noqa: TRY300
			
 
				+    except Exception:
			
 
				         return False
			
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
@@ -1,4 +1,12 @@
 
				-# read Israel's BUR2 from pdf
			
 
				+"""
			
 
				+Read Israel's BUR2 from pdf
			
 
				+
			
 
				+This script reads data from Israel's BUR2
			
 
				+Data are read from pdf using camelot
			
 
				+only the 2019 inventory is read as the BUR refers to BUR2 for earlier years
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 # TODO: bunkers trend tables not read because of special format
			
 
				 
			
@@ -9,7 +17,7 @@ import pandas as pd
 
				 import primap2 as pm2
			
 
				 
			
 
				 # configuration import
			
 
				-from .config_isr_bur2 import (
			
 
				+from config_isr_bur2 import (
			
 
				     basket_copy,
			
 
				     cat_conversion,
			
 
				     cats_to_agg,
			
@@ -29,23 +37,27 @@ from .config_isr_bur2 import (
 
				     trend_table_def,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     ### genral configuration
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Israel' / 'BUR2'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Israel'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Israel" / "BUR2"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Israel"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'ISR_BUR2_2021_'
			
 
				-    inventory_file_pdf = '2nd_Biennial_Update_Report_2021_final.pdf'
			
 
				-    #years_to_read = range(1990, 2018 + 1)
			
 
				+    output_filename = "ISR_BUR2_2021_"
			
 
				+    inventory_file_pdf = "2nd_Biennial_Update_Report_2021_final.pdf"
			
 
				+    # years_to_read = range(1990, 2018 + 1)
			
 
				     pages_to_read_trends = range(48, 54)
			
 
				     pages_to_read_inventory = range(54, 66)
			
 
				 
			
 
				     # define locale to use for str to float conversion
			
 
				-    locale_to_use = 'en_IL.UTF-8'
			
 
				+    locale_to_use = "en_IL.UTF-8"
			
 
				     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
@@ -53,40 +65,44 @@ if __name__ == "__main__":
 
				     #### trend tables
			
 
				 
			
 
				     # read
			
 
				-    tables_trends = camelot.read_pdf(str(input_folder / inventory_file_pdf), pages=','.join(
			
 
				-        [str(page) for page in pages_to_read_trends]), flavor='lattice')
			
 
				+    tables_trends = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file_pdf),
			
 
				+        pages=",".join([str(page) for page in pages_to_read_trends]),
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     # convert to pm2
			
 
				     table_trends = None
			
 
				     for table in trend_table_def.keys():
			
 
				         current_def = trend_table_def[table]
			
 
				         new_table = None
			
 
				-        for subtable in current_def['tables']:
			
 
				+        for subtable in current_def["tables"]:
			
 
				             if new_table is None:
			
 
				                 new_table = tables_trends[subtable].df
			
 
				             else:
			
 
				                 new_table = pd.concat([new_table, tables_trends[subtable].df])
			
 
				 
			
 
				-        for col in new_table.columns.values:
			
 
				+        for col in new_table.columns.to_numpy():
			
 
				             new_table[col] = new_table[col].str.replace("\n", "")
			
 
				 
			
 
				-        new_table.iloc[0, 0] = current_def['given_col']
			
 
				+        new_table.iloc[0, 0] = current_def["given_col"]
			
 
				         new_table.columns = new_table.iloc[0]
			
 
				         new_table = new_table.drop(labels=[0])
			
 
				         new_table = new_table.reset_index(drop=True)
			
 
				 
			
 
				-        if 'take_only' in current_def.keys():
			
 
				+        if "take_only" in current_def.keys():
			
 
				             new_table = new_table[
			
 
				-                new_table[current_def['given_col']].isin(current_def['take_only'])]
			
 
				+                new_table[current_def["given_col"]].isin(current_def["take_only"])
			
 
				+            ]
			
 
				 
			
 
				-        time_cols = [col for col in new_table.columns.values if is_int(col)]
			
 
				+        time_cols = [col for col in new_table.columns.to_numpy() if is_int(col)]
			
 
				         for col in time_cols:
			
 
				             # no NE,NA etc, just numbers, so we can just remove the ','
			
 
				-            new_table[col] = new_table[col].str.replace(',', '')
			
 
				-            new_table[col] = new_table[col].str.replace(' ', '')
			
 
				+            new_table[col] = new_table[col].str.replace(",", "")
			
 
				+            new_table[col] = new_table[col].str.replace(" ", "")
			
 
				 
			
 
				-        for col in current_def['cols_add']:
			
 
				-            new_table[col] = current_def['cols_add'][col]
			
 
				+        for col in current_def["cols_add"]:
			
 
				+            new_table[col] = current_def["cols_add"][col]
			
 
				 
			
 
				         if table_trends is None:
			
 
				             table_trends = new_table
			
@@ -108,31 +124,32 @@ if __name__ == "__main__":
 
				         # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-        time_format='%Y'
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-
			
 
				     data_pm2_trends = pm2.pm2io.from_interchange_format(data_if_trends)
			
 
				 
			
 
				     #### inventory tables
			
 
				     # read inventory tables
			
 
				     tables_inv = camelot.read_pdf(
			
 
				         str(input_folder / inventory_file_pdf),
			
 
				-        pages=','.join([str(page) for page in pages_to_read_inventory]),
			
 
				-        flavor='lattice')
			
 
				+        pages=",".join([str(page) for page in pages_to_read_inventory]),
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     # process
			
 
				     table_inv = None
			
 
				     for table in inv_table_def.keys():
			
 
				         new_table = None
			
 
				         print(f"working on year {table}")
			
 
				-        for subtable in inv_table_def[table]['tables']:
			
 
				+        for subtable in inv_table_def[table]["tables"]:
			
 
				             print(f"adding table {subtable}")
			
 
				             if new_table is None:
			
 
				                 new_table = tables_inv[subtable].df
			
 
				             else:
			
 
				-                new_table = pd.concat([new_table, tables_inv[subtable].df], axis=0,
			
 
				-                                      join='outer')
			
 
				+                new_table = pd.concat(
			
 
				+                    [new_table, tables_inv[subtable].df], axis=0, join="outer"
			
 
				+                )
			
 
				                 new_table = new_table.reset_index(drop=True)
			
 
				 
			
 
				             # replace line breaks, double, and triple spaces in category names
			
@@ -146,75 +163,97 @@ if __name__ == "__main__":
 
				         else:
			
 
				             # replace line breaks in units and entities
			
 
				             new_table.iloc[inv_tab_conf["entity_row"]] = new_table.iloc[
			
 
				-                inv_tab_conf["entity_row"]].str.replace('\n', '')
			
 
				+                inv_tab_conf["entity_row"]
			
 
				+            ].str.replace("\n", "")
			
 
				 
			
 
				         # get_year
			
 
				         year = new_table.iloc[inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]]
			
 
				 
			
 
				         # set category col label
			
 
				-        new_table.iloc[inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]] = 'category'
			
 
				+        new_table.iloc[
			
 
				+            inv_tab_conf["cat_pos"][0], inv_tab_conf["cat_pos"][1]
			
 
				+        ] = "category"
			
 
				 
			
 
				         new_table = pm2.pm2io.nir_add_unit_information(
			
 
				             new_table,
			
 
				-            unit_row=inv_tab_conf["unit_row"], entity_row=inv_tab_conf["entity_row"],
			
 
				-            regexp_entity=inv_tab_conf["regex_entity"], regexp_unit=inv_tab_conf[
			
 
				-                "regex_unit"],
			
 
				-            default_unit="", manual_repl_unit=inv_tab_conf["unit_repl"])
			
 
				+            unit_row=inv_tab_conf["unit_row"],
			
 
				+            entity_row=inv_tab_conf["entity_row"],
			
 
				+            regexp_entity=inv_tab_conf["regex_entity"],
			
 
				+            regexp_unit=inv_tab_conf["regex_unit"],
			
 
				+            default_unit="",
			
 
				+            manual_repl_unit=inv_tab_conf["unit_repl"],
			
 
				+        )
			
 
				 
			
 
				         # fix individual values
			
 
				-        if table == '1996':
			
 
				+        if table == "1996":
			
 
				             loc = new_table[new_table["category"] == "NITRIC ACID PRODUCTION"].index
			
 
				-            value = new_table.loc[loc, "CH4"].values
			
 
				+            value = new_table.loc[loc, "CH4"].to_numpy()
			
 
				             new_table.loc[loc, "N2O"] = value[0, 0]
			
 
				-            new_table.loc[loc, "CH4"] = ''
			
 
				-        if table == '2015':
			
 
				+            new_table.loc[loc, "CH4"] = ""
			
 
				+        if table == "2015":
			
 
				             loc_total = new_table[
			
 
				-                new_table["category"] == "Total national emissions and removals"].index
			
 
				-            loc_IPPU = new_table[new_table["category"] == "2. Industrial processes"].index
			
 
				-            value = new_table.loc[loc_IPPU, "PFCs"].values
			
 
				+                new_table["category"] == "Total national emissions and removals"
			
 
				+            ].index
			
 
				+            loc_IPPU = new_table[
			
 
				+                new_table["category"] == "2. Industrial processes"
			
 
				+            ].index
			
 
				+            value = new_table.loc[loc_IPPU, "PFCs"].to_numpy()
			
 
				             new_table.loc[loc_total, "PFCs"] = value[0, 0]
			
 
				 
			
 
				         # remove lines with empty category
			
 
				         new_table = new_table.drop(new_table[new_table["category"] == ""].index)
			
 
				 
			
 
				         # rename E. Other (please specify) according to row above
			
 
				-        e_locs = list(new_table[new_table["category"] == "E. Other (please specify)"].index)
			
 
				+        e_locs = list(
			
 
				+            new_table[new_table["category"] == "E. Other (please specify)"].index
			
 
				+        )
			
 
				         for loc in e_locs:
			
 
				             iloc = new_table.index.get_loc(loc)
			
 
				-            if new_table.iloc[iloc - 1]["category"][
			
 
				-                0] == "D. CO2 emissions and removals from soil":
			
 
				+            if (
			
 
				+                new_table.iloc[iloc - 1]["category"][0]
			
 
				+                == "D. CO2 emissions and removals from soil"
			
 
				+            ):
			
 
				                 new_table.loc[loc]["category"] = "E. Other (LULUCF)"
			
 
				-            elif new_table.iloc[iloc - 1]["category"][0] in ["D.Waste-water handling",
			
 
				-                                                             'D. Waste-water handling']:
			
 
				+            elif new_table.iloc[iloc - 1]["category"][0] in [
			
 
				+                "D.Waste-water handling",
			
 
				+                "D. Waste-water handling",
			
 
				+            ]:
			
 
				                 new_table.loc[loc]["category"] = "E. Other (Waste)"
			
 
				 
			
 
				         # rename G. Other (please specify) according to row above
			
 
				-        g_locs = list(new_table[new_table["category"] == "G. Other (please specify)"].index)
			
 
				+        g_locs = list(
			
 
				+            new_table[new_table["category"] == "G. Other (please specify)"].index
			
 
				+        )
			
 
				         for loc in g_locs:
			
 
				             iloc = new_table.index.get_loc(loc)
			
 
				-            if new_table.iloc[iloc - 1]["category"][
			
 
				-                0] == "F. Field burning of agricultural residues":
			
 
				+            if (
			
 
				+                new_table.iloc[iloc - 1]["category"][0]
			
 
				+                == "F. Field burning of agricultural residues"
			
 
				+            ):
			
 
				                 new_table.loc[loc]["category"] = "G. Other (Agri)"
			
 
				-            elif new_table.iloc[iloc - 1]["category"][
			
 
				-                0] == "F. Consumption of halocarbons and sulphur hexafluoride":
			
 
				+            elif (
			
 
				+                new_table.iloc[iloc - 1]["category"][0]
			
 
				+                == "F. Consumption of halocarbons and sulphur hexafluoride"
			
 
				+            ):
			
 
				                 new_table.loc[loc]["category"] = "G. Other (IPPU)"
			
 
				 
			
 
				         # set index and convert to long format
			
 
				         new_table = new_table.set_index(inv_tab_conf["index_cols"])
			
 
				-        new_table_long = pm2.pm2io.nir_convert_df_to_long(new_table, year,
			
 
				-                                                          inv_tab_conf["header_long"])
			
 
				+        new_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            new_table, year, inv_tab_conf["header_long"]
			
 
				+        )
			
 
				         # remove line breaks in values
			
 
				         new_table_long["data"] = new_table_long["data"].str.replace("\n", "")
			
 
				 
			
 
				         if table_inv is None:
			
 
				             table_inv = new_table_long
			
 
				         else:
			
 
				-            table_inv = pd.concat([table_inv, new_table_long], axis=0, join='outer')
			
 
				+            table_inv = pd.concat([table_inv, new_table_long], axis=0, join="outer")
			
 
				             table_inv = table_inv.reset_index(drop=True)
			
 
				 
			
 
				     # no NE,NA etc, just numbers, so we can just remove the ','
			
 
				-    table_inv["data"] = table_inv["data"].str.replace(',', '')
			
 
				-    table_inv["data"] = table_inv["data"].str.replace(' ', '')
			
 
				+    table_inv["data"] = table_inv["data"].str.replace(",", "")
			
 
				+    table_inv["data"] = table_inv["data"].str.replace(" ", "")
			
 
				 
			
 
				     # ###
			
 
				     # convert to PRIMAP2 interchange format
			
@@ -231,14 +270,14 @@ if __name__ == "__main__":
 
				         # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-        time_format='%Y',
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				     data_pm2_inv = pm2.pm2io.from_interchange_format(data_if_inv)
			
 
				 
			
 
				     #### combine
			
 
				     # tolerance needs to be high as rounding in trend tables leads to inconsistent data
			
 
				-    data_pm2 = data_pm2_inv.pr.merge(data_pm2_trends,tolerance=0.11)
			
 
				+    data_pm2 = data_pm2_inv.pr.merge(data_pm2_trends, tolerance=0.11)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
@@ -248,40 +287,44 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw"), data_if)
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				-
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     #### processing
			
 
				     data_proc_pm2 = data_pm2
			
 
				 
			
 
				     # combine CO2 emissions and removals
			
 
				     temp_CO2 = data_proc_pm2["CO2"].copy()
			
 
				-    #data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].to_array()
			
 
				+    # data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].to_array()
			
 
				     # .pr.sum(dim="variable", skipna=True, min_count=1)
			
 
				-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
			
 
				-        (dim="entity", skipna=True, min_count=1)
			
 
				+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
			
 
				+        dim="entity", skipna=True, min_count=1
			
 
				+    )
			
 
				     data_proc_pm2["CO2"].attrs = temp_CO2.attrs
			
 
				     data_proc_pm2["CO2"] = data_proc_pm2["CO2"].fillna(temp_CO2)
			
 
				 
			
 
				     # actual processing
			
 
				     country_processing_step1 = {
			
 
				-        'aggregate_cats': cats_to_agg,
			
 
				+        "aggregate_cats": cats_to_agg,
			
 
				     }
			
 
				     data_proc_pm2 = process_data_for_country(
			
 
				         data_proc_pm2,
			
 
				-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
			
 
				+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
			
 
				         gas_baskets={},
			
 
				         processing_info_country=country_processing_step1,
			
 
				     )
			
 
				 
			
 
				     country_processing_step2 = {
			
 
				-        'downscale': downscaling,
			
 
				-        'basket_copy': basket_copy,
			
 
				+        "downscale": downscaling,
			
 
				+        "basket_copy": basket_copy,
			
 
				     }
			
 
				 
			
 
				     data_proc_pm2 = process_data_for_country(
			
@@ -289,16 +332,16 @@ if __name__ == "__main__":
 
				         entities_to_ignore=[],
			
 
				         gas_baskets=gas_baskets,
			
 
				         processing_info_country=country_processing_step2,
			
 
				-        cat_terminology_out = terminology_proc,
			
 
				-        category_conversion = cat_conversion,
			
 
				-        sectors_out = sectors_to_save,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        category_conversion=cat_conversion,
			
 
				+        sectors_out=sectors_to_save,
			
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				     # TODO: processing info is present twice
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -307,9 +350,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Malaysia's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'MYS'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=MYS
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py
@@ -1,16 +1,22 @@
 
				+"""Config for Malaysia's BUR3
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				 
			
 
				 cat_names_fix = {
			
 
				-    '2A3 Glass Prod.': '2A3 Glass Production',
			
 
				-    '2F6 Other Applications': '2F6 Other Applications (please specify)',
			
 
				-    '3A2 Manure Mngmt': '3A2 Manure Mngmt.',
			
 
				-    '3C7 Rice Cultivations': '3C7 Rice Cultivation',
			
 
				+    "2A3 Glass Prod.": "2A3 Glass Production",
			
 
				+    "2F6 Other Applications": "2F6 Other Applications (please specify)",
			
 
				+    "3A2 Manure Mngmt": "3A2 Manure Mngmt.",
			
 
				+    "3C7 Rice Cultivations": "3C7 Rice Cultivation",
			
 
				 }
			
 
				 
			
 
				 values_replacement = {
			
 
				-    '': '-',
			
 
				-    ' ': '-',
			
 
				+    "": "-",
			
 
				+    " ": "-",
			
 
				 }
			
 
				 
			
 
				 cols_for_space_stripping = ["Categories"]
			
@@ -18,25 +24,25 @@ cols_for_space_stripping = ["Categories"]
 
				 index_cols = ["Categories", "entity", "unit"]
			
 
				 
			
 
				 # parameters part 2: conversion to interchange format
			
 
				-cats_remove = ['Memo items', 'Information items']
			
 
				+cats_remove = ["Memo items", "Information items"]
			
 
				 
			
 
				 cat_codes_manual = {
			
 
				-    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
			
 
				-    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
			
 
				-    'CO2 captured': 'M.CCS',
			
 
				-    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
			
 
				-    'For domestic storage': 'M.CCS.DOM',
			
 
				-    'For storage in other countries': 'M.CCS.OCT',
			
 
				-    'International Aviation (International Bunkers)': 'M.BK.A',
			
 
				-    'International Bunkers': 'M.BK',
			
 
				-    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
			
 
				-    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
			
 
				-    'Multilateral Operations': 'M.MULTIOP',
			
 
				-    'Other (please specify)': 'M.OTHER',
			
 
				-    'Total National Emissions and Removals': '0',
			
 
				+    "Annual change in long-term storage of carbon in HWP waste": "M.LTS.AC.HWP",
			
 
				+    "Annual change in total long-term storage of carbon stored": "M.LTS.AC.TOT",
			
 
				+    "CO2 captured": "M.CCS",
			
 
				+    "CO2 from Biomass Burning for Energy Production": "M.BIO",
			
 
				+    "For domestic storage": "M.CCS.DOM",
			
 
				+    "For storage in other countries": "M.CCS.OCT",
			
 
				+    "International Aviation (International Bunkers)": "M.BK.A",
			
 
				+    "International Bunkers": "M.BK",
			
 
				+    "International Water-borne Transport (International Bunkers)": "M.BK.M",
			
 
				+    "Long-term storage of carbon in waste disposal sites": "M.LTS.WASTE",
			
 
				+    "Multilateral Operations": "M.MULTIOP",
			
 
				+    "Other (please specify)": "M.OTHER",
			
 
				+    "Total National Emissions and Removals": "0",
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
			
 
				+cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,4})\s.*"
			
 
				 
			
 
				 coords_terminologies = {
			
 
				     "area": "ISO3",
			
@@ -48,17 +54,12 @@ coords_defaults = {
 
				     "source": "MYS-GHG-inventory",
			
 
				     "provenance": "measured",
			
 
				     "area": "MYS",
			
 
				-    "scenario": "BUR3"
			
 
				+    "scenario": "BUR3",
			
 
				 }
			
 
				 
			
 
				-coords_value_mapping = {
			
 
				-}
			
 
				+coords_value_mapping = {}
			
 
				 
			
 
				-coords_cols = {
			
 
				-    "category": "Categories",
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit"
			
 
				-}
			
 
				+coords_cols = {"category": "Categories", "entity": "entity", "unit": "unit"}
			
 
				 
			
 
				 add_coords_cols = {
			
 
				     "orig_cat_name": ["orig_cat_name", "category"],
			
@@ -76,600 +77,919 @@ meta_data = {
 
				 terminology_proc = coords_terminologies["category"]
			
 
				 
			
 
				 table_def_templates = {
			
 
				-    '184': { #184
			
 
				-        "area": ['54,498,793,100'],
			
 
				-        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
			
 
				-               ],
			
 
				-        },
			
 
				-    },
			
 
				-    '185': { #184
			
 
				-        "area": ['34,504,813,99'],
			
 
				-        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
			
 
				-                '2A1 Cement', '2A2 Lime',
			
 
				-               ],
			
 
				-        },
			
 
				-    },
			
 
				-    '186': { #also 200
			
 
				-        "area": ['53,498,786,104'],
			
 
				-        "cols": ['150,197,238,296,347,396,444,489,540,587,634,686,739'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
			
 
				-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
			
 
				-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
			
 
				-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
			
 
				-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
			
 
				-               ],
			
 
				-            2: ['2B9 Fluorochemical'],
			
 
				-        },
			
 
				-    },
			
 
				-    '187': { # also 201
			
 
				-        "area": ['39,499,807,91'],
			
 
				-        "cols": ['132,185,232,280,327,375,425,470,522,568,613,664,713,763'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
			
 
				-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
			
 
				-                '2B3 Adipic Acid', '2B5 Carbide',
			
 
				-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
			
 
				-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys',
			
 
				-               ],
			
 
				-            2: ['2B9 Fluorochemical'],
			
 
				-            5: ['2B4 Caprolactam,'],
			
 
				-        },
			
 
				-    },
			
 
				-    '188': {
			
 
				-        "area": ['48,503,802,92'],
			
 
				-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
			
 
				-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
			
 
				-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
			
 
				-                '2F1 Refrigeration',
			
 
				-               ],
			
 
				-            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '189': {
			
 
				-        "area": ['41,499,806,95'],
			
 
				-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
			
 
				-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
			
 
				-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
			
 
				-                '2F1 Refrigeration',
			
 
				-               ],
			
 
				-            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '190': {
			
 
				-        "area": ['45,500,802,125'],
			
 
				-        "cols": ['146,193,243,295,349,400,453,501,549,595,644,696,748'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
			
 
				-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
			
 
				-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-               ],
			
 
				-            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
			
 
				-        },
			
 
				-    },
			
 
				-    '191': {
			
 
				-        "area": ['38,498,814,120'],
			
 
				-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
			
 
				-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
			
 
				-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-               ],
			
 
				-            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
			
 
				-        },
			
 
				-    },
			
 
				-    '192': {
			
 
				-        "area": ['39,502,807,106'],
			
 
				-        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
			
 
				-                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested Wood',
			
 
				-                '3D2 Other (please',
			
 
				-               ],
			
 
				-            5: ['3C Aggregate',],
			
 
				-        },
			
 
				-    },
			
 
				-    '193': {
			
 
				-        "area": ['36,508,815,119'],
			
 
				-        "cols": ['128,179,228,278,327,379,428,476,525,571,622,670,717,766'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
			
 
				-                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested',
			
 
				-                '3D2 Other (please',
			
 
				-               ],
			
 
				-            5: ['3C Aggregate',],
			
 
				-        },
			
 
				-    },
			
 
				-    '194': {
			
 
				-        "area": ['80,502,762,151'],
			
 
				-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',],
			
 
				-            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
			
 
				-                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
			
 
				-                '4D2 Industrial Wastewater',
			
 
				-               ],
			
 
				-            5: ['5A Indirect N2O'],
			
 
				-        },
			
 
				-    },
			
 
				-    '195': {
			
 
				-        "area": ['78,508,765,103'],
			
 
				-        "cols": ['191,230,271,314,352,400,438,475,519,566,600,645,686,730'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
			
 
				-                '4B Biological', '4D Wastewater', '4D1 Domestic',
			
 
				-                '4D2 Industrial', '5B Other (please'
			
 
				-               ],
			
 
				-            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
			
 
				-                '4A Solid Waste',
			
 
				-               ],
			
 
				-            5: ['5A Indirect N2O'],
			
 
				-        },
			
 
				-    },
			
 
				-    '196': {
			
 
				-        "area": ['80,502,762,151'],
			
 
				-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['International Aviation', 'International Water-borne',
			
 
				-                'CO2 from Biomass Burning', 'For storage in other',
			
 
				-                'Long-term storage of', 'Annual change in total',
			
 
				-                'Annual change in long-',
			
 
				-               ],
			
 
				-        },
			
 
				-    },
			
 
				-    '197': {
			
 
				-        "area": ['74,507,779,201'],
			
 
				-        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['International Aviation', 'International Water-',
			
 
				-                'CO2 from Biomass', 'For storage in other',
			
 
				-                'Long-term storage of', 'Annual change in total',
			
 
				-                'Annual change in long-',
			
 
				-               ],
			
 
				-        },
			
 
				-    },
			
 
				-    '198': { # first CH4 table
			
 
				-        "area": ['54,498,793,100'],
			
 
				-        "cols": ['140,197,250,296,346,394,444,493,540,587,637,685,738'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
			
 
				-               ],
			
 
				-            -3: ['2A Mineral Industry'],
			
 
				-        },
			
 
				-    },
			
 
				-    '199': {
			
 
				-        "area": ['34,506,818,97'],
			
 
				-        "cols": ['132,177,228,276,329,377,432,479,528,574,618,667,722,774'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
			
 
				-                '2A Mineral', '2A2 Lime',
			
 
				-               ],
			
 
				-        },
			
 
				-    },
			
 
				-    '202': {
			
 
				-        "area": ['48,503,802,92'],
			
 
				-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C3 Aluminium', '2C7 Other (please',
			
 
				-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
			
 
				-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
			
 
				-               ],
			
 
				-            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
			
 
				-                '2F1 Refrigeration',
			
 
				-               ],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '203': {
			
 
				-        "area": ['41,499,806,95'],
			
 
				-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C3 Aluminium', '2C7 Other (please',
			
 
				-                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
			
 
				-                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
			
 
				-               ],
			
 
				-            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
			
 
				-                '2F1 Refrigeration'
			
 
				-               ],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '204': {
			
 
				-        "area": ['45,500,802,125'],
			
 
				-        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F6 Other', '2G Other Product',
			
 
				-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
			
 
				-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-                '3A1 Enteric',
			
 
				-               ],
			
 
				-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
			
 
				-        },
			
 
				-    },
			
 
				-    '205': {
			
 
				-        "area": ['38,498,814,120'],
			
 
				-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F6 Other', '2G Other Product',
			
 
				-                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
			
 
				-                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-                '3A1 Enteric',
			
 
				-               ],
			
 
				-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
			
 
				-        },
			
 
				-    },
			
 
				-    '206': { #also 220
			
 
				-        "area": ['39,502,807,106'],
			
 
				-        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
			
 
				-                '3C6 Indirect N2O', '3C8 Other (please',
			
 
				-                '3D2 Other (please',
			
 
				-               ],
			
 
				-            2: ['3D1 Harvested Wood',],
			
 
				-            5: ['3C Aggregate',],
			
 
				-        },
			
 
				-    },
			
 
				-    '207': { # also 221
			
 
				-        "area": ['36,508,815,110'],
			
 
				-        "cols": ['128,179,228,278,327,379,428,476,527,571,622,670,717,766'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
			
 
				-                '3C6 Indirect N2O', '3C8 Other (please',
			
 
				-                '3D2 Other (please',
			
 
				-               ],
			
 
				-            2: ['3D1 Harvested',],
			
 
				-            5: ['3C Aggregate',],
			
 
				-        },
			
 
				-    },
			
 
				-    '208': { # also 222
			
 
				-        "area": ['80,502,762,151'],
			
 
				-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
			
 
				-                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
			
 
				-                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
			
 
				-                '4D2 Industrial Wastewater'
			
 
				-               ],
			
 
				-            5: ['5A Indirect N2O'],
			
 
				-        },
			
 
				-    },
			
 
				-    '209': { # also 223
			
 
				-        "area": ['78,508,765,103'],
			
 
				-        "cols": ['191,230,271,314,352,400,438,475,519,560,600,645,686,730'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
			
 
				-                '4B Biological', '4D Wastewater', '4D1 Domestic',
			
 
				-                '4D2 Industrial', '5B Other (please',
			
 
				-                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
			
 
				-                '4A Solid Waste'
			
 
				-               ],
			
 
				-            5: ['5A Indirect N2O'],
			
 
				-        },
			
 
				-    },
			
 
				-    '210': { # also 224
			
 
				-        "area": ['80,502,762,151'],
			
 
				-        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['International Aviation', 'International Water-borne',
			
 
				-                'Long-term storage of', 'Annual change in total',
			
 
				-                'Annual change in long-',
			
 
				-               ],
			
 
				-            2: ['CO2 from Biomass Burning', 'For storage in other',],
			
 
				-        },
			
 
				-    },
			
 
				-    '211': { # also 225
			
 
				-        "area": ['74,507,779,201'],
			
 
				-        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['International Aviation', 'International Water-',
			
 
				-                'Long-term storage of', 'Annual change in total',
			
 
				-                'Annual change in long-', 'CO2 from Biomass',
			
 
				-               ],
			
 
				-            2: ['For storage in other',],
			
 
				-        },
			
 
				-    },
			
 
				-    '212': {
			
 
				-        "area": ['54,498,793,100'],
			
 
				-        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL',
			
 
				-               ],
			
 
				-            2: ['2A1 Cement',],
			
 
				-        },
			
 
				-    },
			
 
				-    '213': {
			
 
				-        "area": ['34,504,813,99'],
			
 
				-        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
			
 
				-                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
			
 
				-                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
			
 
				-               ],
			
 
				-            2: ['2A1 Cement', '2A2 Lime',],
			
 
				-        },
			
 
				-    },
			
 
				-    '214': {
			
 
				-        "area": ['47,499,801,93'],
			
 
				-        "cols": ['141,197,246,297,350,396,453,502,550,595,642,692,748'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2A5 Other (please',
			
 
				-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
			
 
				-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
			
 
				-                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
			
 
				-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
			
 
				-               ],
			
 
				-            2: ['2A3 Glass', '2A4 Other Process', '2B9 Fluorochemical'],
			
 
				-            -3: ['2C Metal Industry'],
			
 
				-        },
			
 
				-    },
			
 
				-    '215': {
			
 
				-        "area": ['39,499,807,91'],
			
 
				-        "cols": ['132,180,232,280,327,375,425,470,522,568,613,664,713,763'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2A5 Other (please',
			
 
				-                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
			
 
				-                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
			
 
				-                '2B6 Titanium Dioxide', '2B7 Soda Ash', '2B8 Petrochemical',
			
 
				-                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
			
 
				-               ],
			
 
				-            2: ['2A4 Other Process', '2B9 Fluorochemical'],
			
 
				-            -3: ['2C Metal Industry'],
			
 
				-        },
			
 
				-    },
			
 
				-    '216': {
			
 
				-        "area": ['48,503,802,92'],
			
 
				-        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
			
 
				-                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
			
 
				-                '2E5 Other (please',
			
 
				-               ],
			
 
				-            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
			
 
				-                '2E4 Heat Transfer', '2F1 Refrigeration',
			
 
				-               ],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '217': {
			
 
				-        "area": ['41,499,806,95'],
			
 
				-        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
			
 
				-                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
			
 
				-                '2E5 Other (please',
			
 
				-               ],
			
 
				-            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
			
 
				-                '2E4 Heat Transfer', '2F1 Refrigeration',
			
 
				-               ],
			
 
				-            5: ['2F Product Uses as'],
			
 
				-        },
			
 
				-    },
			
 
				-    '218': {
			
 
				-        "area": ['45,500,802,125'],
			
 
				-        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
			
 
				-                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-               ],
			
 
				-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
			
 
				-                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
			
 
				-        },
			
 
				-    },
			
 
				-    '219': {
			
 
				-        "area": ['38,498,814,120'],
			
 
				-        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
			
 
				-                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
			
 
				-               ],
			
 
				-            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
			
 
				-                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
			
 
				-        },
			
 
				-    },
			
 
				-    '226': { # also 334, 238
			
 
				-        "area": ['48,510,797,99'],
			
 
				-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
			
 
				-        }
			
 
				-    },
			
 
				-    '227': { # also 331, 335, 339
			
 
				-        "area": ['27,510,818,99'],
			
 
				-        "cols": ['250,290,333,372,413,452,494,536,576,616,656,699,739,781'],
			
 
				-        "rows_to_fix": {
			
 
				-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
			
 
				-        }
			
 
				-    },
			
 
				-    '228': {
			
 
				-        "area": ['48,510,797,99'],
			
 
				-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone'],
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent'],
			
 
				-        },
			
 
				-    },
			
 
				-    '229': {
			
 
				-        "area": ['25,512,819,86'],
			
 
				-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
			
 
				-        "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone'],
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent'],
			
 
				+    "184": {  # 184
			
 
				+        "area": ["54,498,793,100"],
			
 
				+        "cols": ["150,197,250,296,346,394,444,493,540,587,637,685,738"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel Combustion",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other emissions",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+                "2A1 Cement",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "185": {  # 184
			
 
				+        "area": ["34,504,813,99"],
			
 
				+        "cols": ["128,177,224,273,321,373,425,473,519,564,611,661,713,765"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+                "2A Mineral",
			
 
				+                "2A1 Cement",
			
 
				+                "2A2 Lime",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "186": {  # also 200
			
 
				+        "area": ["53,498,786,104"],
			
 
				+        "cols": ["150,197,238,296,347,396,444,489,540,587,634,686,739"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2A3 Glass",
			
 
				+                "2A4 Other Process",
			
 
				+                "2A5 Other (please",
			
 
				+                "2B Chemical",
			
 
				+                "2B1 Ammonia",
			
 
				+                "2B2 Nitric Acid",
			
 
				+                "2B3 Adipic Acid",
			
 
				+                "2B4 Caprolactam,",
			
 
				+                "2B5 Carbide",
			
 
				+                "2B6 Titanium",
			
 
				+                "2B7 Soda Ash",
			
 
				+                "2B8 Petrochemical",
			
 
				+                "2B10 Other (Please",
			
 
				+                "2C1 Iron and Steel",
			
 
				+                "2C2 Ferroalloys",
			
 
				+            ],
			
 
				+            2: ["2B9 Fluorochemical"],
			
 
				+        },
			
 
				+    },
			
 
				+    "187": {  # also 201
			
 
				+        "area": ["39,499,807,91"],
			
 
				+        "cols": ["132,185,232,280,327,375,425,470,522,568,613,664,713,763"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2A3 Glass",
			
 
				+                "2A4 Other Process",
			
 
				+                "2A5 Other (please",
			
 
				+                "2B Chemical",
			
 
				+                "2B1 Ammonia",
			
 
				+                "2B2 Nitric Acid",
			
 
				+                "2B3 Adipic Acid",
			
 
				+                "2B5 Carbide",
			
 
				+                "2B6 Titanium",
			
 
				+                "2B7 Soda Ash",
			
 
				+                "2B8 Petrochemical",
			
 
				+                "2B10 Other (Please",
			
 
				+                "2C1 Iron and Steel",
			
 
				+                "2C2 Ferroalloys",
			
 
				+            ],
			
 
				+            2: ["2B9 Fluorochemical"],
			
 
				+            5: ["2B4 Caprolactam,"],
			
 
				+        },
			
 
				+    },
			
 
				+    "188": {
			
 
				+        "area": ["48,503,802,92"],
			
 
				+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C4 Magnesium",
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            2: ["2E2 TFT Flat Panel", "2E4 Heat Transfer"],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "189": {
			
 
				+        "area": ["41,499,806,95"],
			
 
				+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C4 Magnesium",
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            2: ["2E2 TFT Flat Panel", "2E4 Heat Transfer"],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "190": {
			
 
				+        "area": ["45,500,802,125"],
			
 
				+        "cols": ["146,193,243,295,349,400,453,501,549,595,644,696,748"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F2 Foam Blowing",
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+            ],
			
 
				+            2: ["2G1 Electrical", "2G3 N2O from", "3A1 Enteric"],
			
 
				+        },
			
 
				+    },
			
 
				+    "191": {
			
 
				+        "area": ["38,498,814,120"],
			
 
				+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F2 Foam Blowing",
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+            ],
			
 
				+            2: ["2G1 Electrical", "2G3 N2O from", "3A1 Enteric"],
			
 
				+        },
			
 
				+    },
			
 
				+    "192": {
			
 
				+        "area": ["39,502,807,106"],
			
 
				+        "cols": ["134,193,245,296,346,400,455,507,556,602,650,701,755"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "3C1 Emissions from",
			
 
				+                "3C4 Direct N2O",
			
 
				+                "3C5 Indirect N2O",
			
 
				+                "3C6 Indirect N2O",
			
 
				+                "3C8 Other (please",
			
 
				+                "3D1 Harvested Wood",
			
 
				+                "3D2 Other (please",
			
 
				+            ],
			
 
				+            5: [
			
 
				+                "3C Aggregate",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "193": {
			
 
				+        "area": ["36,508,815,119"],
			
 
				+        "cols": ["128,179,228,278,327,379,428,476,525,571,622,670,717,766"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "3C1 Emissions from",
			
 
				+                "3C4 Direct N2O",
			
 
				+                "3C5 Indirect N2O",
			
 
				+                "3C6 Indirect N2O",
			
 
				+                "3C8 Other (please",
			
 
				+                "3D1 Harvested",
			
 
				+                "3D2 Other (please",
			
 
				+            ],
			
 
				+            5: [
			
 
				+                "3C Aggregate",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "194": {
			
 
				+        "area": ["80,502,762,151"],
			
 
				+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "4C Incineration and",
			
 
				+                "4C2 Open Burning of",
			
 
				+                "4E Other",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "4A1 Managed Waste",
			
 
				+                "4A2 Unmanaged Waste",
			
 
				+                "4A3 Uncategorised Waste",
			
 
				+                "4B Biological Treatment",
			
 
				+                "4D Wastewater",
			
 
				+                "4D1 Domestic Wastewater",
			
 
				+                "4D2 Industrial Wastewater",
			
 
				+            ],
			
 
				+            5: ["5A Indirect N2O"],
			
 
				+        },
			
 
				+    },
			
 
				+    "195": {
			
 
				+        "area": ["78,508,765,103"],
			
 
				+        "cols": ["191,230,271,314,352,400,438,475,519,566,600,645,686,730"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "4C Incineration and",
			
 
				+                "4C2 Open Burning of",
			
 
				+                "4E Other",
			
 
				+                "4B Biological",
			
 
				+                "4D Wastewater",
			
 
				+                "4D1 Domestic",
			
 
				+                "4D2 Industrial",
			
 
				+                "5B Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "4A1 Managed Waste",
			
 
				+                "4A2 Unmanaged Waste",
			
 
				+                "4A3 Uncategorised",
			
 
				+                "4A Solid Waste",
			
 
				+            ],
			
 
				+            5: ["5A Indirect N2O"],
			
 
				+        },
			
 
				+    },
			
 
				+    "196": {
			
 
				+        "area": ["80,502,762,151"],
			
 
				+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "International Aviation",
			
 
				+                "International Water-borne",
			
 
				+                "CO2 from Biomass Burning",
			
 
				+                "For storage in other",
			
 
				+                "Long-term storage of",
			
 
				+                "Annual change in total",
			
 
				+                "Annual change in long-",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "197": {
			
 
				+        "area": ["74,507,779,201"],
			
 
				+        "cols": ["182,226,268,311,354,398,444,482,524,565,610,654,693,733"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "International Aviation",
			
 
				+                "International Water-",
			
 
				+                "CO2 from Biomass",
			
 
				+                "For storage in other",
			
 
				+                "Long-term storage of",
			
 
				+                "Annual change in total",
			
 
				+                "Annual change in long-",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "198": {  # first CH4 table
			
 
				+        "area": ["54,498,793,100"],
			
 
				+        "cols": ["140,197,250,296,346,394,444,493,540,587,637,685,738"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel Combustion",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other emissions",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+                "2A1 Cement",
			
 
				+            ],
			
 
				+            -3: ["2A Mineral Industry"],
			
 
				+        },
			
 
				+    },
			
 
				+    "199": {
			
 
				+        "area": ["34,506,818,97"],
			
 
				+        "cols": ["132,177,228,276,329,377,432,479,528,574,618,667,722,774"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+                "2A1 Cement",
			
 
				+                "2A Mineral",
			
 
				+                "2A2 Lime",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "202": {
			
 
				+        "area": ["48,503,802,92"],
			
 
				+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2C4 Magnesium",
			
 
				+                "2E2 TFT Flat Panel",
			
 
				+                "2E4 Heat Transfer",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "203": {
			
 
				+        "area": ["41,499,806,95"],
			
 
				+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2C4 Magnesium",
			
 
				+                "2E2 TFT Flat Panel",
			
 
				+                "2E4 Heat Transfer",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "204": {
			
 
				+        "area": ["45,500,802,125"],
			
 
				+        "cols": ["146,193,243,295,349,400,455,501,549,595,644,696,748"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+                "3A1 Enteric",
			
 
				+            ],
			
 
				+            2: ["2F2 Foam Blowing", "2G1 Electrical", "2G3 N2O from"],
			
 
				+        },
			
 
				+    },
			
 
				+    "205": {
			
 
				+        "area": ["38,498,814,120"],
			
 
				+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+                "3A1 Enteric",
			
 
				+            ],
			
 
				+            2: ["2F2 Foam Blowing", "2G1 Electrical", "2G3 N2O from"],
			
 
				+        },
			
 
				+    },
			
 
				+    "206": {  # also 220
			
 
				+        "area": ["39,502,807,106"],
			
 
				+        "cols": ["134,193,245,296,346,400,455,507,556,602,650,701,755"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "3C1 Emissions from",
			
 
				+                "3C4 Direct N2O",
			
 
				+                "3C5 Indirect N2O",
			
 
				+                "3C6 Indirect N2O",
			
 
				+                "3C8 Other (please",
			
 
				+                "3D2 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "3D1 Harvested Wood",
			
 
				+            ],
			
 
				+            5: [
			
 
				+                "3C Aggregate",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "207": {  # also 221
			
 
				+        "area": ["36,508,815,110"],
			
 
				+        "cols": ["128,179,228,278,327,379,428,476,527,571,622,670,717,766"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "3C1 Emissions from",
			
 
				+                "3C4 Direct N2O",
			
 
				+                "3C5 Indirect N2O",
			
 
				+                "3C6 Indirect N2O",
			
 
				+                "3C8 Other (please",
			
 
				+                "3D2 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "3D1 Harvested",
			
 
				+            ],
			
 
				+            5: [
			
 
				+                "3C Aggregate",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "208": {  # also 222
			
 
				+        "area": ["80,502,762,151"],
			
 
				+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "4C Incineration and",
			
 
				+                "4C2 Open Burning of",
			
 
				+                "4E Other",
			
 
				+                "4A1 Managed Waste",
			
 
				+                "4A2 Unmanaged Waste",
			
 
				+                "4A3 Uncategorised Waste",
			
 
				+                "4B Biological Treatment",
			
 
				+                "4D Wastewater",
			
 
				+                "4D1 Domestic Wastewater",
			
 
				+                "4D2 Industrial Wastewater",
			
 
				+            ],
			
 
				+            5: ["5A Indirect N2O"],
			
 
				+        },
			
 
				+    },
			
 
				+    "209": {  # also 223
			
 
				+        "area": ["78,508,765,103"],
			
 
				+        "cols": ["191,230,271,314,352,400,438,475,519,560,600,645,686,730"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "4C Incineration and",
			
 
				+                "4C2 Open Burning of",
			
 
				+                "4E Other",
			
 
				+                "4B Biological",
			
 
				+                "4D Wastewater",
			
 
				+                "4D1 Domestic",
			
 
				+                "4D2 Industrial",
			
 
				+                "5B Other (please",
			
 
				+                "4A1 Managed Waste",
			
 
				+                "4A2 Unmanaged Waste",
			
 
				+                "4A3 Uncategorised",
			
 
				+                "4A Solid Waste",
			
 
				+            ],
			
 
				+            5: ["5A Indirect N2O"],
			
 
				+        },
			
 
				+    },
			
 
				+    "210": {  # also 224
			
 
				+        "area": ["80,502,762,151"],
			
 
				+        "cols": ["201,243,285,329,376,419,462,502,551,591,635,679,724"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "International Aviation",
			
 
				+                "International Water-borne",
			
 
				+                "Long-term storage of",
			
 
				+                "Annual change in total",
			
 
				+                "Annual change in long-",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "CO2 from Biomass Burning",
			
 
				+                "For storage in other",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "211": {  # also 225
			
 
				+        "area": ["74,507,779,201"],
			
 
				+        "cols": ["182,226,268,311,354,398,444,482,524,565,610,654,693,733"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "International Aviation",
			
 
				+                "International Water-",
			
 
				+                "Long-term storage of",
			
 
				+                "Annual change in total",
			
 
				+                "Annual change in long-",
			
 
				+                "CO2 from Biomass",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "For storage in other",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "212": {
			
 
				+        "area": ["54,498,793,100"],
			
 
				+        "cols": ["150,197,250,296,346,394,444,493,540,587,637,685,738"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel Combustion",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other emissions",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2A1 Cement",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "213": {
			
 
				+        "area": ["34,504,813,99"],
			
 
				+        "cols": ["128,177,224,273,321,373,425,473,519,564,611,661,713,765"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "Total National",
			
 
				+                "1A Fuel",
			
 
				+                "1A1 Energy",
			
 
				+                "1A2 Manufacturing",
			
 
				+                "1B Fugitive",
			
 
				+                "1B2 Oil and Natural",
			
 
				+                "1B3 Other",
			
 
				+                "1C Carbon Dioxide",
			
 
				+                "2 INDUSTRIAL",
			
 
				+                "2A Mineral",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2A1 Cement",
			
 
				+                "2A2 Lime",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "214": {
			
 
				+        "area": ["47,499,801,93"],
			
 
				+        "cols": ["141,197,246,297,350,396,453,502,550,595,642,692,748"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2A5 Other (please",
			
 
				+                "2B Chemical",
			
 
				+                "2B1 Ammonia",
			
 
				+                "2B2 Nitric Acid",
			
 
				+                "2B3 Adipic Acid",
			
 
				+                "2B4 Caprolactam,",
			
 
				+                "2B5 Carbide",
			
 
				+                "2B6 Titanium",
			
 
				+                "2B7 Soda Ash",
			
 
				+                "2B8 Petrochemical",
			
 
				+                "2B10 Other (Please",
			
 
				+                "2C1 Iron and Steel",
			
 
				+                "2C2 Ferroalloys",
			
 
				+            ],
			
 
				+            2: ["2A3 Glass", "2A4 Other Process", "2B9 Fluorochemical"],
			
 
				+            -3: ["2C Metal Industry"],
			
 
				+        },
			
 
				+    },
			
 
				+    "215": {
			
 
				+        "area": ["39,499,807,91"],
			
 
				+        "cols": ["132,180,232,280,327,375,425,470,522,568,613,664,713,763"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2A5 Other (please",
			
 
				+                "2B Chemical",
			
 
				+                "2B1 Ammonia",
			
 
				+                "2B2 Nitric Acid",
			
 
				+                "2B3 Adipic Acid",
			
 
				+                "2B4 Caprolactam,",
			
 
				+                "2B5 Carbide",
			
 
				+                "2B6 Titanium Dioxide",
			
 
				+                "2B7 Soda Ash",
			
 
				+                "2B8 Petrochemical",
			
 
				+                "2B10 Other (Please",
			
 
				+                "2C1 Iron and Steel",
			
 
				+                "2C2 Ferroalloys",
			
 
				+            ],
			
 
				+            2: ["2A4 Other Process", "2B9 Fluorochemical"],
			
 
				+            -3: ["2C Metal Industry"],
			
 
				+        },
			
 
				+    },
			
 
				+    "216": {
			
 
				+        "area": ["48,503,802,92"],
			
 
				+        "cols": ["146,194,245,295,346,400,452,500,549,596,642,695,746"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C4 Magnesium",
			
 
				+                "2E2 TFT Flat Panel",
			
 
				+                "2E4 Heat Transfer",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "217": {
			
 
				+        "area": ["41,499,806,95"],
			
 
				+        "cols": ["141,184,233,282,331,376,427,472,520,567,618,665,717,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2C7 Other (please",
			
 
				+                "2D Non-Energy",
			
 
				+                "2D2 Paraffin Wax",
			
 
				+                "2D4 Other (please",
			
 
				+                "2E Electronics",
			
 
				+                "2E1 Integrated",
			
 
				+                "2E5 Other (please",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2C3 Aluminium",
			
 
				+                "2C4 Magnesium",
			
 
				+                "2E2 TFT Flat Panel",
			
 
				+                "2E4 Heat Transfer",
			
 
				+                "2F1 Refrigeration",
			
 
				+            ],
			
 
				+            5: ["2F Product Uses as"],
			
 
				+        },
			
 
				+    },
			
 
				+    "218": {
			
 
				+        "area": ["45,500,802,125"],
			
 
				+        "cols": ["146,193,243,295,349,400,455,501,549,595,644,696,748"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G3 N2O from",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2F2 Foam Blowing",
			
 
				+                "2G1 Electrical",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "3A1 Enteric",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "219": {
			
 
				+        "area": ["38,498,814,120"],
			
 
				+        "cols": ["130,180,229,277,326,381,429,477,526,570,620,669,717,765"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: [
			
 
				+                "2F6 Other",
			
 
				+                "2G Other Product",
			
 
				+                "2G2 SF6 and PFCs",
			
 
				+                "2G3 N2O from",
			
 
				+                "2H3 Other (please",
			
 
				+                "3 AGRICULTURE,",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2F2 Foam Blowing",
			
 
				+                "2G1 Electrical",
			
 
				+                "2G4 Other (Please",
			
 
				+                "2H1 Pulp and Paper",
			
 
				+                "2H2 Food and",
			
 
				+                "3A1 Enteric",
			
 
				+            ],
			
 
				+        },
			
 
				+    },
			
 
				+    "226": {  # also 334, 238
			
 
				+        "area": ["48,510,797,99"],
			
 
				+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
			
 
				+        },
			
 
				+    },
			
 
				+    "227": {  # also 331, 335, 339
			
 
				+        "area": ["27,510,818,99"],
			
 
				+        "cols": ["250,290,333,372,413,452,494,536,576,616,656,699,739,781"],
			
 
				+        "rows_to_fix": {
			
 
				+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
			
 
				+        },
			
 
				+    },
			
 
				+    "228": {
			
 
				+        "area": ["48,510,797,99"],
			
 
				+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone"],
			
 
				+            2: ["2D Non-Energy Products from Fuels and Solvent"],
			
 
				+        },
			
 
				+    },
			
 
				+    "229": {
			
 
				+        "area": ["25,512,819,86"],
			
 
				+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
			
 
				+        "rows_to_fix": {
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone"],
			
 
				+            2: ["2D Non-Energy Products from Fuels and Solvent"],
			
 
				+        },
			
 
				+    },
			
 
				+    "230": {
			
 
				+        "area": ["48,510,797,99"],
			
 
				+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            -3: [
			
 
				+                "Total National Emissions and Removals",
			
 
				+                "2 INDUSTRIAL PROCESSES AND PRODUCT USE",
			
 
				+            ],
			
 
				+            2: ["2B4 Caprolactam, Glyoxal and Glyoxylic Acid"],
			
 
				+        },
			
 
				+    },
			
 
				+    "232": {  # also 236
			
 
				+        "area": ["48,510,797,99"],
			
 
				+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
			
 
				+        "rows_to_fix": {
			
 
				+            -3: [
			
 
				+                "2G2 SF6 and PFCs from Other Product Uses",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "2D Non-Energy Products from Fuels and Solvent",
			
 
				+                "2F Product Uses as Substitutes for Ozone",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '230': {
			
 
				-        "area": ['48,510,797,99'],
			
 
				-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            -3: ['Total National Emissions and Removals', '2 INDUSTRIAL PROCESSES AND PRODUCT USE'],
			
 
				-            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
			
 
				-        }
			
 
				-    },
			
 
				-    '232': { # also 236
			
 
				-        "area": ['48,510,797,99'],
			
 
				-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
			
 
				-        "rows_to_fix": {
			
 
				-            -3: ['2G2 SF6 and PFCs from Other Product Uses',],
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent',
			
 
				-                '2F Product Uses as Substitutes for Ozone',]
			
 
				-        },
			
 
				-    },
			
 
				-    '233': {
			
 
				-        "area": ['25,512,819,86'],
			
 
				-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
			
 
				-        "rows_to_fix": {
			
 
				-            -5: ['2F Product Uses as Substitutes for Ozone'],
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent'],
			
 
				-            -3: ['2G Other Product Manufacture and Use',
			
 
				-                 '2G2 SF6 and PFCs from Other Product Uses',]
			
 
				+    "233": {
			
 
				+        "area": ["25,512,819,86"],
			
 
				+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
			
 
				+        "rows_to_fix": {
			
 
				+            -5: ["2F Product Uses as Substitutes for Ozone"],
			
 
				+            2: ["2D Non-Energy Products from Fuels and Solvent"],
			
 
				+            -3: [
			
 
				+                "2G Other Product Manufacture and Use",
			
 
				+                "2G2 SF6 and PFCs from Other Product Uses",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '237': {
			
 
				-        "area": ['25,512,819,86'],
			
 
				-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
			
 
				+    "237": {
			
 
				+        "area": ["25,512,819,86"],
			
 
				+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent',
			
 
				-                '2F Product Uses as Substitutes for Ozone'],
			
 
				+            2: [
			
 
				+                "2D Non-Energy Products from Fuels and Solvent",
			
 
				+                "2F Product Uses as Substitutes for Ozone",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '240': {
			
 
				-        "area": ['48,510,797,99'],
			
 
				-        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
			
 
				+    "240": {
			
 
				+        "area": ["48,510,797,99"],
			
 
				+        "cols": ["271,310,350,393,435,475,514,557,594,640,678,719,760"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent',
			
 
				-                '2F Product Uses as Substitutes for Ozone'],
			
 
				-            -3: ['2E Electronics Industry',
			
 
				-                 '2F1 Refrigeration and Air Conditioning',
			
 
				-                 '2G2 SF6 and PFCs from Other Product Uses',],
			
 
				+            2: [
			
 
				+                "2D Non-Energy Products from Fuels and Solvent",
			
 
				+                "2F Product Uses as Substitutes for Ozone",
			
 
				+            ],
			
 
				+            -3: [
			
 
				+                "2E Electronics Industry",
			
 
				+                "2F1 Refrigeration and Air Conditioning",
			
 
				+                "2G2 SF6 and PFCs from Other Product Uses",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '241': {
			
 
				-        "area": ['25,512,819,86'],
			
 
				-        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
			
 
				+    "241": {
			
 
				+        "area": ["25,512,819,86"],
			
 
				+        "cols": ["246,291,331,370,412,454,495,536,577,619,656,699,740,777"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2D Non-Energy Products from Fuels and Solvent',
			
 
				-                '2F Product Uses as Substitutes for Ozone',
			
 
				-                '2E1 Integrated Circuit or Semiconductor',],
			
 
				-            -3: ['2F1 Refrigeration and Air Conditioning',
			
 
				-                 '2G2 SF6 and PFCs from Other Product Uses',],
			
 
				+            2: [
			
 
				+                "2D Non-Energy Products from Fuels and Solvent",
			
 
				+                "2F Product Uses as Substitutes for Ozone",
			
 
				+                "2E1 Integrated Circuit or Semiconductor",
			
 
				+            ],
			
 
				+            -3: [
			
 
				+                "2F1 Refrigeration and Air Conditioning",
			
 
				+                "2G2 SF6 and PFCs from Other Product Uses",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 table_defs = {
			
 
				-    '184': {"template": '184', "entity": "CO2", "unit": "Gg CO2 / yr"}, #CO2
			
 
				-    '185': {"template": '185', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '186': {"template": '186', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '187': {"template": '187', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '188': {"template": '188', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '189': {"template": '189', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '190': {"template": '190', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '191': {"template": '191', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '192': {"template": '192', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '193': {"template": '193', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '194': {"template": '194', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '195': {"template": '195', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '196': {"template": '196', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '197': {"template": '197', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '198': {"template": '198', "entity": "CH4", "unit": "Gg CH4 / yr"}, #CH4
			
 
				-    '199': {"template": '199', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '200': {"template": '186', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '201': {"template": '187', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '202': {"template": '202', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '203': {"template": '203', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '204': {"template": '204', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '205': {"template": '205', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '206': {"template": '206', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '207': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '208': {"template": '208', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '209': {"template": '209', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '210': {"template": '210', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '211': {"template": '211', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '212': {"template": '212', "entity": "N2O", "unit": "Gg N2O / yr"}, #N2O
			
 
				-    '213': {"template": '213', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '214': {"template": '214', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '215': {"template": '215', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '216': {"template": '216', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '217': {"template": '217', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '218': {"template": '218', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '219': {"template": '219', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '220': {"template": '206', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '221': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '222': {"template": '208', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '223': {"template": '209', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '224': {"template": '210', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '225': {"template": '211', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '226': {"template": '226', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #HFCs
			
 
				-    '227': {"template": '227', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '228': {"template": '228', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '229': {"template": '229', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '230': {"template": '230', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #PFCs
			
 
				-    '231': {"template": '227', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '232': {"template": '232', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '233': {"template": '233', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '234': {"template": '226', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #SF6
			
 
				-    '235': {"template": '227', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '236': {"template": '232', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '237': {"template": '237', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '238': {"template": '226', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #NF3
			
 
				-    '239': {"template": '227', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '240': {"template": '240', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				-    '241': {"template": '241', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "184": {"template": "184", "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
			
 
				+    "185": {"template": "185", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "186": {"template": "186", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "187": {"template": "187", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "188": {"template": "188", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "189": {"template": "189", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "190": {"template": "190", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "191": {"template": "191", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "192": {"template": "192", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "193": {"template": "193", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "194": {"template": "194", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "195": {"template": "195", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "196": {"template": "196", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "197": {"template": "197", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "198": {"template": "198", "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
			
 
				+    "199": {"template": "199", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "200": {"template": "186", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "201": {"template": "187", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "202": {"template": "202", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "203": {"template": "203", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "204": {"template": "204", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "205": {"template": "205", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "206": {"template": "206", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "207": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "208": {"template": "208", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "209": {"template": "209", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "210": {"template": "210", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "211": {"template": "211", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "212": {"template": "212", "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
			
 
				+    "213": {"template": "213", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "214": {"template": "214", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "215": {"template": "215", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "216": {"template": "216", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "217": {"template": "217", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "218": {"template": "218", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "219": {"template": "219", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "220": {"template": "206", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "221": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "222": {"template": "208", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "223": {"template": "209", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "224": {"template": "210", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "225": {"template": "211", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "226": {
			
 
				+        "template": "226",
			
 
				+        "entity": "HFCS (AR4GWP100)",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # HFCs
			
 
				+    "227": {"template": "227", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "228": {"template": "228", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "229": {"template": "229", "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "230": {
			
 
				+        "template": "230",
			
 
				+        "entity": "PFCS (AR4GWP100)",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # PFCs
			
 
				+    "231": {"template": "227", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "232": {"template": "232", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "233": {"template": "233", "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "234": {
			
 
				+        "template": "226",
			
 
				+        "entity": "SF6 (AR4GWP100)",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # SF6
			
 
				+    "235": {"template": "227", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "236": {"template": "232", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "237": {"template": "237", "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "238": {
			
 
				+        "template": "226",
			
 
				+        "entity": "NF3 (AR4GWP100)",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # NF3
			
 
				+    "239": {"template": "227", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "240": {"template": "240", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				+    "241": {"template": "241", "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
			
 
				 }
			
 
				 
			
 
				 country_processing_step1 = {
			
 
				-    'aggregate_cats': {
			
 
				-        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
			
 
				-                                 '3.C.6', '3.C.7', '3.C.8'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
			
 
				-                             '(Agriculture)'},
			
 
				-        'M.3.D.AG': {'sources': ['3.D.2'],
			
 
				-                     'name': 'Other (Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
			
 
				-                     'name': 'Agriculture excluding livestock'},
			
 
				-        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
			
 
				-                     'name': 'Agriculture'},
			
 
				-        'M.3.D.LU': {'sources': ['3.D.1'],
			
 
				-                     'name': 'Other (LULUCF)'},
			
 
				-        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
			
 
				-                     'name': 'LULUCF'},
			
 
				-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
			
 
				-                     'name': 'National total emissions excluding LULUCF'},
			
 
				-    },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "aggregate_cats": {
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "3.C.1",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+                "3.C.8",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land "
			
 
				+            "(Agriculture)",
			
 
				+        },
			
 
				+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG", "M.3.D.AG"],
			
 
				+            "name": "Agriculture excluding livestock",
			
 
				+        },
			
 
				+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
			
 
				+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+            "name": "National total emissions excluding LULUCF",
			
 
				+        },
			
 
				+    },
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 gas_baskets = {
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py
@@ -1,3 +1,9 @@
 
				+"""Config for Malaysia's BUR4
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				 
			
@@ -9,8 +15,8 @@ cat_names_fix = {
 
				 }
			
 
				 
			
 
				 values_replacement = {
			
 
				-    '': '-',
			
 
				-    ' ': '-',
			
 
				+    "": "-",
			
 
				+    " ": "-",
			
 
				 }
			
 
				 
			
 
				 cols_for_space_stripping = ["Categories"]
			
@@ -18,25 +24,25 @@ cols_for_space_stripping = ["Categories"]
 
				 index_cols = ["Categories", "entity", "unit"]
			
 
				 
			
 
				 # parameters part 2: conversion to interchange format
			
 
				-cats_remove = ['Memo items', 'Information items',  'Information items (1)']
			
 
				+cats_remove = ["Memo items", "Information items", "Information items (1)"]
			
 
				 
			
 
				 cat_codes_manual = {
			
 
				-    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
			
 
				-    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
			
 
				-    'CO2 captured': 'M.CCS',
			
 
				-    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
			
 
				-    'For domestic storage': 'M.CCS.DOM',
			
 
				-    'For storage in other countries': 'M.CCS.OCT',
			
 
				-    'International Aviation (International Bunkers)': 'M.BK.A',
			
 
				-    'International Bunkers': 'M.BK',
			
 
				-    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
			
 
				-    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
			
 
				-    'Multilateral Operations': 'M.MULTIOP',
			
 
				-    'Other (please specify)': 'M.OTHER',
			
 
				-    'Total National Emissions and Removals': '0',
			
 
				+    "Annual change in long-term storage of carbon in HWP waste": "M.LTS.AC.HWP",
			
 
				+    "Annual change in total long-term storage of carbon stored": "M.LTS.AC.TOT",
			
 
				+    "CO2 captured": "M.CCS",
			
 
				+    "CO2 from Biomass Burning for Energy Production": "M.BIO",
			
 
				+    "For domestic storage": "M.CCS.DOM",
			
 
				+    "For storage in other countries": "M.CCS.OCT",
			
 
				+    "International Aviation (International Bunkers)": "M.BK.A",
			
 
				+    "International Bunkers": "M.BK",
			
 
				+    "International Water-borne Transport (International Bunkers)": "M.BK.M",
			
 
				+    "Long-term storage of carbon in waste disposal sites": "M.LTS.WASTE",
			
 
				+    "Multilateral Operations": "M.MULTIOP",
			
 
				+    "Other (please specify)": "M.OTHER",
			
 
				+    "Total National Emissions and Removals": "0",
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
			
 
				+cat_code_regexp = r"(?P<code>^[A-Z0-9]{1,4})\s.*"
			
 
				 
			
 
				 
			
 
				 coords_terminologies = {
			
@@ -49,27 +55,22 @@ coords_defaults = {
 
				     "source": "MYS-GHG-inventory",
			
 
				     "provenance": "measured",
			
 
				     "area": "MYS",
			
 
				-    "scenario": "BUR4"
			
 
				+    "scenario": "BUR4",
			
 
				 }
			
 
				 
			
 
				-coords_value_mapping = {
			
 
				-}
			
 
				+coords_value_mapping = {}
			
 
				 
			
 
				-coords_cols = {
			
 
				-    "category": "Categories",
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit"
			
 
				-}
			
 
				+coords_cols = {"category": "Categories", "entity": "entity", "unit": "unit"}
			
 
				 
			
 
				 add_coords_cols = {
			
 
				     "orig_cat_name": ["orig_cat_name", "category"],
			
 
				 }
			
 
				 
			
 
				-#filter_remove = {
			
 
				+# filter_remove = {
			
 
				 #    "f1": {
			
 
				 #        "entity": ["CO2(grossemissions)", "CO2(removals)"],
			
 
				 #    },
			
 
				-#}
			
 
				+# }
			
 
				 
			
 
				 meta_data = {
			
 
				     "references": "https://unfccc.int/documents/624776",
			
@@ -84,318 +85,322 @@ terminology_proc = coords_terminologies["category"]
 
				 
			
 
				 table_def_templates = {
			
 
				     # CO2
			
 
				-    '203': {  # 203, 249
			
 
				-        "area": ['70,480,768,169'],
			
 
				+    "203": {  # 203, 249
			
 
				+        "area": ["70,480,768,169"],
			
 
				     },
			
 
				-    '204': {  # 204
			
 
				-        "area": ['70,500,763,141'],
			
 
				+    "204": {  # 204
			
 
				+        "area": ["70,500,763,141"],
			
 
				     },
			
 
				-    '205': {  # 205, 209, 2014, 2018
			
 
				-        "area": ['70,495,763,95'],
			
 
				+    "205": {  # 205, 209, 2014, 2018
			
 
				+        "area": ["70,495,763,95"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['5A Indirect N2O emissions from the atmospheric deposition of'],
			
 
				+            2: ["5A Indirect N2O emissions from the atmospheric deposition of"],
			
 
				         },
			
 
				     },
			
 
				-    '206': {  # 206
			
 
				-        "area": ['70,495,763,353'],
			
 
				+    "206": {  # 206
			
 
				+        "area": ["70,495,763,353"],
			
 
				     },
			
 
				-    '207': {  # 207, 208, 211, 212, 213, 215, 217, 223, 227, 231,
			
 
				+    "207": {  # 207, 208, 211, 212, 213, 215, 217, 223, 227, 231,
			
 
				         # 251, 257, 259, 263, 265
			
 
				-        "area": ['70,495,763,95'],
			
 
				+        "area": ["70,495,763,95"],
			
 
				     },
			
 
				-    '216': {  #  216
			
 
				-        "area": ['70,500,763,95'],
			
 
				+    "216": {  #  216
			
 
				+        "area": ["70,500,763,95"],
			
 
				     },
			
 
				     # CH4
			
 
				-    '219': {  # 219, 255
			
 
				-        "area": ['70,480,768,100'],
			
 
				+    "219": {  # 219, 255
			
 
				+        "area": ["70,480,768,100"],
			
 
				     },
			
 
				-    '220': {  # 220, 224, 228
			
 
				-        "area": ['70,495,763,95'],
			
 
				+    "220": {  # 220, 224, 228
			
 
				+        "area": ["70,495,763,95"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '221': {  # 221
			
 
				-        "area": ['92,508,748,92'],
			
 
				-        "cols": ['298,340,380,422,462,502,542,582,622,662,702'],
			
 
				+    "221": {  # 221
			
 
				+        "area": ["92,508,748,92"],
			
 
				+        "cols": ["298,340,380,422,462,502,542,582,622,662,702"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3C Aggregate sources and Non-CO2 emissions'],
			
 
				-            2: ['5A Indirect N2O emissions from the atmospheric'],
			
 
				+            3: ["3C Aggregate sources and Non-CO2 emissions"],
			
 
				+            2: ["5A Indirect N2O emissions from the atmospheric"],
			
 
				         },
			
 
				     },
			
 
				-    '222': {  # 222
			
 
				-        "area": ['70,495,763,323'],
			
 
				+    "222": {  # 222
			
 
				+        "area": ["70,495,763,323"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['Annual change in long-term storage of carbon in HWP'],
			
 
				+            2: ["Annual change in long-term storage of carbon in HWP"],
			
 
				         },
			
 
				     },
			
 
				-    '225': {  # 225
			
 
				-        "area": ['92,508,748,92'],
			
 
				-        "cols": ['311,357,400,443,486,529,572,615,658,701'],
			
 
				+    "225": {  # 225
			
 
				+        "area": ["92,508,748,92"],
			
 
				+        "cols": ["311,357,400,443,486,529,572,615,658,701"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3C Aggregate sources and Non-CO2 emissions'],
			
 
				+            3: ["3C Aggregate sources and Non-CO2 emissions"],
			
 
				         },
			
 
				     },
			
 
				-    '226': {  # 226, 230
			
 
				-        "area": ['70,495,763,95'],
			
 
				+    "226": {  # 226, 230
			
 
				+        "area": ["70,495,763,95"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['5A Indirect N2O emissions from the atmospheric',
			
 
				-                'Annual change in long-term storage of carbon in HWP'],
			
 
				+            2: [
			
 
				+                "5A Indirect N2O emissions from the atmospheric",
			
 
				+                "Annual change in long-term storage of carbon in HWP",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '229': {  # 229
			
 
				-        "area": ['114,508,725,92'],
			
 
				-        "cols": ['333,379,421,464,506,548,590,632,674'],
			
 
				+    "229": {  # 229
			
 
				+        "area": ["114,508,725,92"],
			
 
				+        "cols": ["333,379,421,464,506,548,590,632,674"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3C Aggregate sources and Non-CO2 emissions'],
			
 
				+            3: ["3C Aggregate sources and Non-CO2 emissions"],
			
 
				         },
			
 
				     },
			
 
				     # N2O
			
 
				-    '232': {  # 232
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['315,366,416,466,516,566,616,666,716'],
			
 
				+    "232": {  # 232
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["315,366,416,466,516,566,616,666,716"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '233': {  # 233
			
 
				-        "area": ['70,495,763,95'],
			
 
				+    "233": {  # 233
			
 
				+        "area": ["70,495,763,95"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3C Aggregate sources and Non-CO2 emissions'],
			
 
				+            3: ["3C Aggregate sources and Non-CO2 emissions"],
			
 
				         },
			
 
				     },
			
 
				-    '234': {  # 234
			
 
				-        "area": ['70,495,763,95'],
			
 
				+    "234": {  # 234
			
 
				+        "area": ["70,495,763,95"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['International Water-borne Transport (International'],
			
 
				+            3: ["International Water-borne Transport (International"],
			
 
				         },
			
 
				     },
			
 
				-    '236': {  # 236
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['298,344,392,439,487,534,580,629,675,721'],
			
 
				+    "236": {  # 236
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["298,344,392,439,487,534,580,629,675,721"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '240': {  # 240
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['283,329,372,416,459,504,550,594,639,682,726'],
			
 
				+    "240": {  # 240
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["283,329,372,416,459,504,550,594,639,682,726"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				     # HFCs
			
 
				-    '243': {  # 243
			
 
				-        "area": ['70,480,763,95'],
			
 
				-        "cols": ['408,449,489,527,567,604,644,681,721'],
			
 
				+    "243": {  # 243
			
 
				+        "area": ["70,480,763,95"],
			
 
				+        "cols": ["408,449,489,527,567,604,644,681,721"],
			
 
				     },
			
 
				-    '244': {  # 244
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['408,449,489,527,567,604,644,681,721'],
			
 
				+    "244": {  # 244
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["408,449,489,527,567,604,644,681,721"],
			
 
				     },
			
 
				-    '245': {  # 245, 246
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['405,442,478,515,550,587,621,657,693,729'],
			
 
				+    "245": {  # 245, 246
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["405,442,478,515,550,587,621,657,693,729"],
			
 
				     },
			
 
				-    '247': {  # 247, 248
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['384,426,459,493,531,564,597,633,666,700,735'],
			
 
				+    "247": {  # 247, 248
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["384,426,459,493,531,564,597,633,666,700,735"],
			
 
				     },
			
 
				     # PFCs
			
 
				-    '250': {  # 250
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['341,389,436,485,531,579,626,674,723'],
			
 
				+    "250": {  # 250
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["341,389,436,485,531,579,626,674,723"],
			
 
				     },
			
 
				-    '252': {  # 252
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['323,370,415,459,504,547,590,636,680,726'],
			
 
				+    "252": {  # 252
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["323,370,415,459,504,547,590,636,680,726"],
			
 
				     },
			
 
				-    '253': {  # 253
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['334,378,419,464,511,554,597,636,668,702,735'],
			
 
				+    "253": {  # 253
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["334,378,419,464,511,554,597,636,668,702,735"],
			
 
				     },
			
 
				-    '254': {  # 254
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['330,378,419,464,511,554,597,636,668,702,735'],
			
 
				+    "254": {  # 254
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["330,378,419,464,511,554,597,636,668,702,735"],
			
 
				         "rows_to_fix": {
			
 
				-            -3: ['2F Product Uses as Substitutes for Ozone Depleting Substances'],
			
 
				+            -3: ["2F Product Uses as Substitutes for Ozone Depleting Substances"],
			
 
				         },
			
 
				     },
			
 
				     # SF6
			
 
				-    '256': {  # 256
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['382,420,462,504,546,588,630,672,714'],
			
 
				+    "256": {  # 256
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["382,420,462,504,546,588,630,672,714"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '258': {  # 258
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['363,399,441,481,522,564,606,646,688,728'],
			
 
				+    "258": {  # 258
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["363,399,441,481,522,564,606,646,688,728"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '260': {  # 260
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['346,381,419,458,498,536,576,614,652,692,732'],
			
 
				+    "260": {  # 260
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["346,381,419,458,498,536,576,614,652,692,732"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				     # NF3
			
 
				-    '261': {  # 261
			
 
				-        "area": ['70,490,768,100'],
			
 
				-        "cols": ['364,412,454,496,538,581,623,667,710'],
			
 
				+    "261": {  # 261
			
 
				+        "area": ["70,490,768,100"],
			
 
				+        "cols": ["364,412,454,496,538,581,623,667,710"],
			
 
				     },
			
 
				-    '262': {  # 262
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['376,420,462,504,545,591,633,676,718'],
			
 
				+    "262": {  # 262
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["376,420,462,504,545,591,633,676,718"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '264': {  # 264
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['370,415,451,491,530,569,609,651,689,729'],
			
 
				+    "264": {  # 264
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["370,415,451,491,530,569,609,651,689,729"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				-    '266': {  # 266
			
 
				-        "area": ['70,495,763,95'],
			
 
				-        "cols": ['355,392,430,467,505,544,580,619,656,695,732'],
			
 
				+    "266": {  # 266
			
 
				+        "area": ["70,495,763,95"],
			
 
				+        "cols": ["355,392,430,467,505,544,580,619,656,695,732"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
			
 
				+            3: ["2F Product Uses as Substitutes for Ozone Depleting"],
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 table_defs = {
			
 
				-    '203': {"template": '203', "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
			
 
				-    '204': {"template": '204', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '205': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '206': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '207': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '208': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '209': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '210': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '211': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '212': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '213': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '214': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '215': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '216': {"template": '216', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '217': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '218': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				-    '219': {"template": '219', "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
			
 
				-    '220': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '221': {"template": '221', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '222': {"template": '222', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '223': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '224': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '225': {"template": '225', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '226': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '227': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '228': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '229': {"template": '229', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '230': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				-    '231': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
			
 
				-    '232': {"template": '232', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '233': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '234': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '235': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '236': {"template": '236', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '237': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '238': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '239': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '240': {"template": '240', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '241': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '242': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				-    '243': {"template": '243', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},  # HFCs
			
 
				-    '244': {"template": '244', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '245': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '246': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '247': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '248': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '249': {"template": '203', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},  # PFCs
			
 
				-    '250': {"template": '250', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '251': {"template": '207', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '252': {"template": '252', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '253': {"template": '253', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '254': {"template": '254', "entity": f"PFCS ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '255': {"template": '219', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},  # SF6
			
 
				-    '256': {"template": '256', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '257': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '258': {"template": '258', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '259': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '260': {"template": '260', "entity": f"SF6 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '261': {"template": '261', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},  # NF3
			
 
				-    '262': {"template": '262', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '263': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '264': {"template": '264', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '265': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				-    '266': {"template": '266', "entity": f"NF3 ({gwp_to_use})",
			
 
				-            "unit": "Gg CO2 / yr"},
			
 
				+    "203": {"template": "203", "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
			
 
				+    "204": {"template": "204", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "205": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "206": {"template": "206", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "207": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "208": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "209": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "210": {"template": "206", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "211": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "212": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "213": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "214": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "215": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "216": {"template": "216", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "217": {"template": "207", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "218": {"template": "205", "entity": "CO2", "unit": "Gg CO2 / yr"},
			
 
				+    "219": {"template": "219", "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
			
 
				+    "220": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "221": {"template": "221", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "222": {"template": "222", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "223": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "224": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "225": {"template": "225", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "226": {"template": "226", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "227": {"template": "207", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "228": {"template": "220", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "229": {"template": "229", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "230": {"template": "226", "entity": "CH4", "unit": "Gg CH4 / yr"},
			
 
				+    "231": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
			
 
				+    "232": {"template": "232", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "233": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "234": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "235": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "236": {"template": "236", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "237": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "238": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "239": {"template": "207", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "240": {"template": "240", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "241": {"template": "233", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "242": {"template": "234", "entity": "N2O", "unit": "Gg N2O / yr"},
			
 
				+    "243": {
			
 
				+        "template": "243",
			
 
				+        "entity": f"HFCS ({gwp_to_use})",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # HFCs
			
 
				+    "244": {"template": "244", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "245": {"template": "245", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "246": {"template": "245", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "247": {"template": "247", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "248": {"template": "247", "entity": f"HFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "249": {
			
 
				+        "template": "203",
			
 
				+        "entity": f"PFCS ({gwp_to_use})",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # PFCs
			
 
				+    "250": {"template": "250", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "251": {"template": "207", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "252": {"template": "252", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "253": {"template": "253", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "254": {"template": "254", "entity": f"PFCS ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "255": {
			
 
				+        "template": "219",
			
 
				+        "entity": f"SF6 ({gwp_to_use})",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # SF6
			
 
				+    "256": {"template": "256", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "257": {"template": "207", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "258": {"template": "258", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "259": {"template": "207", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "260": {"template": "260", "entity": f"SF6 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "261": {
			
 
				+        "template": "261",
			
 
				+        "entity": f"NF3 ({gwp_to_use})",
			
 
				+        "unit": "Gg CO2 / yr",
			
 
				+    },  # NF3
			
 
				+    "262": {"template": "262", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "263": {"template": "207", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "264": {"template": "264", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "265": {"template": "207", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				+    "266": {"template": "266", "entity": f"NF3 ({gwp_to_use})", "unit": "Gg CO2 / yr"},
			
 
				 }
			
 
				 
			
 
				 country_processing_step1 = {
			
 
				-    'aggregate_cats': {
			
 
				-        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
			
 
				-                                 '3.C.6', '3.C.7', '3.C.8'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
			
 
				-                             '(Agriculture)'},
			
 
				-        'M.3.D.AG': {'sources': ['3.D.2'],
			
 
				-                     'name': 'Other (Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
			
 
				-                     'name': 'Agriculture excluding livestock'},
			
 
				-        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
			
 
				-                     'name': 'Agriculture'},
			
 
				-        'M.3.D.LU': {'sources': ['3.D.1'],
			
 
				-                     'name': 'Other (LULUCF)'},
			
 
				-        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
			
 
				-                     'name': 'LULUCF'},
			
 
				-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
			
 
				-                     'name': 'National total emissions excluding LULUCF'},
			
 
				-    },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "aggregate_cats": {
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "3.C.1",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+                "3.C.8",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land "
			
 
				+            "(Agriculture)",
			
 
				+        },
			
 
				+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG", "M.3.D.AG"],
			
 
				+            "name": "Agriculture excluding livestock",
			
 
				+        },
			
 
				+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
			
 
				+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+            "name": "National total emissions excluding LULUCF",
			
 
				+        },
			
 
				+    },
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 gas_baskets = {
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
@@ -1,8 +1,15 @@
 
				-# this script reads data from Malaysia's BUR3
			
 
				+"""
			
 
				+Read Malaysia's BUR3 from pdf
			
 
				+
			
 
				+This script reads data from Malaysia's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				-from .config_mys_bur3 import (
			
 
				+from config_mys_bur3 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_names_fix,
			
@@ -33,8 +40,8 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Malaysia" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Malaysia"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
@@ -47,7 +54,7 @@ if __name__ == "__main__":
 
				     # SF6: 234 - 237
			
 
				     # NF3: 238 - 241
			
 
				 
			
 
				-    output_filename = 'MYS_BUR3_2020_'
			
 
				+    output_filename = "MYS_BUR3_2020_"
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				     # ###
			
@@ -62,36 +69,44 @@ if __name__ == "__main__":
 
				         area = table_def_templates[page_template_nr]["area"]
			
 
				         if "cols" in table_def_templates[page_template_nr].keys():
			
 
				             cols = table_def_templates[page_template_nr]["cols"]
			
 
				-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
			
 
				-                                      flavor='stream', table_areas=area, columns=cols,
			
 
				-                                      split_text=True)
			
 
				+            tables = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=str(page),
			
 
				+                flavor="stream",
			
 
				+                table_areas=area,
			
 
				+                columns=cols,
			
 
				+                split_text=True,
			
 
				+            )
			
 
				         else:
			
 
				-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
			
 
				-                                      flavor='stream', table_areas=area)
			
 
				+            tables = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=str(page),
			
 
				+                flavor="stream",
			
 
				+                table_areas=area,
			
 
				+            )
			
 
				 
			
 
				         df_current = tables[0].df.copy()
			
 
				-        df_current.iloc[0,0] = 'Categories'
			
 
				+        df_current.iloc[0, 0] = "Categories"
			
 
				         df_current.columns = df_current.iloc[0]
			
 
				         df_current = df_current.drop(0)
			
 
				         # replace double \n
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("\n", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
			
 
				         # replace double and triple spaces
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("   ", " ")
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("  ", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
			
 
				 
			
 
				         # fix the split rows
			
 
				         if "rows_to_fix" in table_def_templates[page_template_nr].keys():
			
 
				             for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
			
 
				-                df_current = fix_rows(df_current,
			
 
				-                                      table_def_templates[page_template_nr]["rows_to_fix"][
			
 
				-                                          n_rows], index_cols[0], n_rows)
			
 
				+                df_current = fix_rows(
			
 
				+                    df_current,
			
 
				+                    table_def_templates[page_template_nr]["rows_to_fix"][n_rows],
			
 
				+                    index_cols[0],
			
 
				+                    n_rows,
			
 
				+                )
			
 
				 
			
 
				         # replace category names with typos
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].replace(cat_names_fix)
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
			
 
				 
			
 
				         # replace empty stings
			
 
				         df_current = df_current.replace(values_replacement)
			
@@ -106,7 +121,7 @@ if __name__ == "__main__":
 
				         for col in cols_for_space_stripping:
			
 
				             df_current[col] = df_current[col].str.strip()
			
 
				 
			
 
				-        # print(df_current.columns.values)
			
 
				+        # print(df_current.columns.to_numpy())
			
 
				 
			
 
				         # aggregate dfs
			
 
				         if df_all is None:
			
@@ -118,10 +133,11 @@ if __name__ == "__main__":
 
				             cols_both = list(set(cols_all).intersection(set(cols_current)))
			
 
				             # print(cols_both)
			
 
				             if len(cols_both) > 0:
			
 
				-                df_all = df_all.merge(df_current, how='outer', on=cols_both,
			
 
				-                                      suffixes=(None, None))
			
 
				+                df_all = df_all.merge(
			
 
				+                    df_current, how="outer", on=cols_both, suffixes=(None, None)
			
 
				+                )
			
 
				             else:
			
 
				-                df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
			
 
				+                df_all = df_all.merge(df_current, how="outer", suffixes=(None, None))
			
 
				             df_all = df_all.groupby(index_cols).first().reset_index()
			
 
				             # df_all = df_all.join(df_current, how='outer')
			
 
				 
			
@@ -137,28 +153,38 @@ if __name__ == "__main__":
 
				     # replace cat names by codes in col "Categories"
			
 
				     # first the manual replacements
			
 
				     df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex repalcements
			
 
				-    def repl(m):
			
 
				-        return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				-    df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				+    df_all["Categories"] = df_all["Categories"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				 
			
 
				     # make sure all col headers are str
			
 
				     df_all.columns = df_all.columns.map(str)
			
 
				 
			
 
				     # remove thousands separators as pd.to_numeric can't deal with that
			
 
				     # also replace None with NaN
			
 
				-    year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
			
 
				+    year_cols = list(
			
 
				+        set(df_all.columns) - set(["Categories", "entity", "unit", "orig_cat_name"])
			
 
				+    )
			
 
				     for col in year_cols:
			
 
				         df_all.loc[:, col] = df_all.loc[:, col].str.strip()
			
 
				-        def repl(m):
			
 
				-            return m.group('part1') + m.group('part2')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-        df_all[col][df_all[col].isnull()] = 'NaN'
			
 
				+
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace(
			
 
				+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+        )
			
 
				+        df_all[col][df_all[col].isna()] = "NaN"
			
 
				         # manually map code NENO to nan
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("NENO", "NaN")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("O NANaN", "NaN")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NO", "0")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NA NO I", "0")
			
 
				         # TODO: add code to PRIMAP2
			
 
				 
			
 
				     # drop orig_cat_name as it's non-unique per category
			
@@ -167,17 +193,17 @@ if __name__ == "__main__":
 
				     data_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_all,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				-        #coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				 
			
@@ -190,12 +216,15 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-        data_if)
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # ## process the data
			
@@ -211,9 +240,9 @@ if __name__ == "__main__":
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -222,9 +251,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
@@ -1,10 +1,16 @@
 
				-# this script reads data from Malaysia's BUR4
			
 
				-# code ist mostly identical to BUR3
			
 
				+"""
			
 
				+Read Malaysia's BUR3 from pdf
			
 
				+
			
 
				+This script reads data from Malaysia's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+Code ist mostly identical to BUR3
			
 
				+"""
			
 
				 
			
 
				 
			
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				-from .config_mys_bur4 import (
			
 
				+from config_mys_bur4 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_names_fix,
			
@@ -35,8 +41,8 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR4'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Malaysia" / "BUR4"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Malaysia"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
@@ -50,7 +56,7 @@ if __name__ == "__main__":
 
				     # SF6: 255 - 260
			
 
				     # NF3: 261 - 266
			
 
				 
			
 
				-    output_filename = 'MYS_BUR4_2022_'
			
 
				+    output_filename = "MYS_BUR4_2022_"
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				     # ###
			
@@ -65,36 +71,44 @@ if __name__ == "__main__":
 
				         area = table_def_templates[page_template_nr]["area"]
			
 
				         if "cols" in table_def_templates[page_template_nr].keys():
			
 
				             cols = table_def_templates[page_template_nr]["cols"]
			
 
				-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
			
 
				-                                      flavor='stream', table_areas=area, columns=cols,
			
 
				-                                      split_text=True)
			
 
				+            tables = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=str(page),
			
 
				+                flavor="stream",
			
 
				+                table_areas=area,
			
 
				+                columns=cols,
			
 
				+                split_text=True,
			
 
				+            )
			
 
				         else:
			
 
				-            tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
			
 
				-                                      flavor='stream', table_areas=area)
			
 
				+            tables = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=str(page),
			
 
				+                flavor="stream",
			
 
				+                table_areas=area,
			
 
				+            )
			
 
				 
			
 
				         df_current = tables[0].df.copy()
			
 
				-        df_current.iloc[0,0] = 'Categories'
			
 
				+        df_current.iloc[0, 0] = "Categories"
			
 
				         df_current.columns = df_current.iloc[0]
			
 
				         df_current = df_current.drop(0)
			
 
				         # replace double \n
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("\n", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
			
 
				         # replace double and triple spaces
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("   ", " ")
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].str.replace("  ", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
			
 
				 
			
 
				         # fix the split rows
			
 
				         if "rows_to_fix" in table_def_templates[page_template_nr].keys():
			
 
				             for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
			
 
				-                df_current = fix_rows(df_current,
			
 
				-                                      table_def_templates[page_template_nr]["rows_to_fix"][
			
 
				-                                          n_rows], index_cols[0], n_rows)
			
 
				+                df_current = fix_rows(
			
 
				+                    df_current,
			
 
				+                    table_def_templates[page_template_nr]["rows_to_fix"][n_rows],
			
 
				+                    index_cols[0],
			
 
				+                    n_rows,
			
 
				+                )
			
 
				 
			
 
				         # replace category names with typos
			
 
				-        df_current[index_cols[0]] = \
			
 
				-            df_current[index_cols[0]].replace(cat_names_fix)
			
 
				+        df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
			
 
				 
			
 
				         # replace empty stings
			
 
				         df_current = df_current.replace(values_replacement)
			
@@ -109,22 +123,23 @@ if __name__ == "__main__":
 
				         for col in cols_for_space_stripping:
			
 
				             df_current[col] = df_current[col].str.strip()
			
 
				 
			
 
				-        # print(df_current.columns.values)
			
 
				+        # print(df_current.columns.to_numpy())
			
 
				 
			
 
				         # aggregate dfs
			
 
				         if df_all is None:
			
 
				             df_all = df_current
			
 
				         else:
			
 
				             # find intersecting cols
			
 
				-            cols_all = df_all.columns.values
			
 
				-            cols_current = df_current.columns.values
			
 
				+            cols_all = df_all.columns.to_numpy()
			
 
				+            cols_current = df_current.columns.to_numpy()
			
 
				             cols_both = list(set(cols_all).intersection(set(cols_current)))
			
 
				             # print(cols_both)
			
 
				             if len(cols_both) > 0:
			
 
				-                df_all = df_all.merge(df_current, how='outer', on=cols_both,
			
 
				-                                      suffixes=(None, None))
			
 
				+                df_all = df_all.merge(
			
 
				+                    df_current, how="outer", on=cols_both, suffixes=(None, None)
			
 
				+                )
			
 
				             else:
			
 
				-                df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
			
 
				+                df_all = df_all.merge(df_current, how="outer", suffixes=(None, None))
			
 
				             df_all = df_all.groupby(index_cols).first().reset_index()
			
 
				             # df_all = df_all.join(df_current, how='outer')
			
 
				 
			
@@ -140,28 +155,38 @@ if __name__ == "__main__":
 
				     # replace cat names by codes in col "Categories"
			
 
				     # first the manual replacements
			
 
				     df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex repalcements
			
 
				-    def repl(m):
			
 
				-        return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				-    df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				+    df_all["Categories"] = df_all["Categories"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				 
			
 
				     # make sure all col headers are str
			
 
				     df_all.columns = df_all.columns.map(str)
			
 
				 
			
 
				     # remove thousands separators as pd.to_numeric can't deal with that
			
 
				     # also replace None with NaN
			
 
				-    year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
			
 
				+    year_cols = list(
			
 
				+        set(df_all.columns) - set(["Categories", "entity", "unit", "orig_cat_name"])
			
 
				+    )
			
 
				     for col in year_cols:
			
 
				         df_all.loc[:, col] = df_all.loc[:, col].str.strip()
			
 
				-        def repl(m):
			
 
				-            return m.group('part1') + m.group('part2')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-        df_all[col][df_all[col].isnull()] = 'NaN'
			
 
				+
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace(
			
 
				+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+        )
			
 
				+        df_all[col][df_all[col].isna()] = "NaN"
			
 
				         # manually map code NENO to nan
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
			
 
				-        df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("NENO", "NaN")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("O NANaN", "NaN")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NO", "0")
			
 
				+        df_all.loc[:, col] = df_all.loc[:, col].str.replace("IE NA NO I", "0")
			
 
				         # TODO: add code to PRIMAP2
			
 
				 
			
 
				     # drop orig_cat_name as it's non-unique per category
			
@@ -170,17 +195,17 @@ if __name__ == "__main__":
 
				     data_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_all,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				-        #coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				 
			
@@ -193,12 +218,15 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-        data_if)
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # ## process the data
			
@@ -214,9 +242,9 @@ if __name__ == "__main__":
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -225,9 +253,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Mexico's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'MEX'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=MEX
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/config_mex_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/config_mex_bur3.py
@@ -1,8 +1,42 @@
 
				+"""Config for Mexico's BUR3
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 import pandas as pd
			
 
				 
			
 
				 
			
 
				-def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
			
 
				-             n_rows: int) -> pd.DataFrame:
			
 
				+def fix_rows(
			
 
				+    data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
			
 
				+) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Combine split rows
			
 
				+
			
 
				+    This function combines rows which have been split into several rows during data
			
 
				+    reading from pdf because they contained line breaks.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    data: pd.DataFrame
			
 
				+        The data to work with
			
 
				+    rows_to_fix: list
			
 
				+        List of values for which to fix rows
			
 
				+    col_to_use: str
			
 
				+        column to use to find the rows to merge
			
 
				+    n_rows: int
			
 
				+        How many rows to combine for each row found. e.g. 3 means combine the found
			
 
				+        row with the following two rows. Negative values are used for more
			
 
				+        complicated situations where the rows to merge are also before the position
			
 
				+        of the value that indicates the merge. See code for details
			
 
				+
			
 
				+    Returns
			
 
				+    -------
			
 
				+        pandas DataFrame with combined rows. The individual rows are removed
			
 
				+
			
 
				+    TODO: move function to helper module (make sure to have one function that works
			
 
				+     for all cases)
			
 
				+    """
			
 
				     for row in rows_to_fix:
			
 
				         # print(row)
			
 
				         # find the row number and collect the row and the next two rows
			
@@ -16,29 +50,29 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
 
				         for item in index:
			
 
				             loc = data.index.get_loc(item)
			
 
				             ####print(data[col_to_use].loc[loc + 1])
			
 
				-            if n_rows == -2:
			
 
				+            if n_rows == -2:  # noqa: PLR2004
			
 
				                 locs_to_merge = list(range(loc - 1, loc + 1))
			
 
				                 loc_to_check = loc - 1
			
 
				-            if n_rows == -6:
			
 
				+            elif n_rows == -6:  # noqa: PLR2004
			
 
				                 locs_to_merge = list(range(loc - 3, loc + 3))
			
 
				                 loc_to_check = loc - 3
			
 
				-            elif n_rows == -3:
			
 
				+            elif n_rows == -3:  # noqa: PLR2004
			
 
				                 locs_to_merge = list(range(loc - 1, loc + 2))
			
 
				                 loc_to_check = loc - 1
			
 
				             else:
			
 
				                 locs_to_merge = list(range(loc, loc + n_rows))
			
 
				                 loc_to_check = loc + 1
			
 
				 
			
 
				-            if (data[col_to_use].loc[loc_to_check] == '') or n_rows == 2:
			
 
				+            if (not data[col_to_use].loc[loc_to_check]) or n_rows == 2:  # noqa: PLR2004
			
 
				                 rows_to_merge = data.iloc[locs_to_merge]
			
 
				                 indices_to_merge = rows_to_merge.index
			
 
				                 # replace numerical NaN values
			
 
				                 ####print(rows_to_merge)
			
 
				-                rows_to_merge = rows_to_merge.fillna('')
			
 
				+                rows_to_merge = rows_to_merge.fillna("")
			
 
				                 ####print("fillna")
			
 
				                 ####print(rows_to_merge)
			
 
				                 # join the three rows
			
 
				-                new_row = rows_to_merge.agg(' '.join)
			
 
				+                new_row = rows_to_merge.agg(" ".join)
			
 
				                 # replace the double spaces that are created
			
 
				                 # must be done here and not at the end as splits are not always
			
 
				                 # the same and join would produce different col values
			
@@ -54,67 +88,77 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str,
 
				         data = data.reset_index(drop=True)
			
 
				     return data
			
 
				 
			
 
				+
			
 
				 page_defs = {
			
 
				-    '118': {
			
 
				+    "118": {
			
 
				         "camelot": {
			
 
				-            "table_areas": ['49,602,551,73'],
			
 
				-            "columns": ['223,277,314,348,392,422,446,483'],
			
 
				+            "table_areas": ["49,602,551,73"],
			
 
				+            "columns": ["223,277,314,348,392,422,446,483"],
			
 
				             "split_text": False,
			
 
				             "flavor": "stream",
			
 
				         },
			
 
				         "rows_to_fix": {
			
 
				             -6: ["Categorías de fuentes y"],
			
 
				-            3: ["Todas las emisiones y las absorciones",
			
 
				+            3: [
			
 
				+                "Todas las emisiones y las absorciones",
			
 
				                 "Todas las emisiones (sin [3B] Tierra ni",
			
 
				                 "[1A] Actividades de quema del",
			
 
				                 "[1A2] Industrias manufactura y de la",
			
 
				                 "[1B] Emisiones fugitivas provenientes de",
			
 
				-                "[2] Procesos industriales y uso de"],
			
 
				+                "[2] Procesos industriales y uso de",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '119': {
			
 
				+    "119": {
			
 
				         "camelot": {
			
 
				-            "table_areas": ['49,650,551,77'],
			
 
				-            "columns": ['228,275,317,352,394,421,446,483'],
			
 
				+            "table_areas": ["49,650,551,77"],
			
 
				+            "columns": ["228,275,317,352,394,421,446,483"],
			
 
				             "split_text": True,
			
 
				             "flavor": "stream",
			
 
				         },
			
 
				         "rows_to_fix": {
			
 
				             -6: ["Categorías de fuentes y"],
			
 
				-            3: ["[2B4] Producción de caprolactama,",
			
 
				+            3: [
			
 
				+                "[2B4] Producción de caprolactama,",
			
 
				                 "[2B8] Producción petroquímica y negro",
			
 
				                 "[2D] Uso de productos no energéticos de",
			
 
				-                "[2E1] Circuitos integrados o"],
			
 
				+                "[2E1] Circuitos integrados o",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '120': {
			
 
				+    "120": {
			
 
				         "camelot": {
			
 
				-            "table_areas": ['49,650,551,77'],
			
 
				-            "columns": ['223,277,314,348,392,422,446,483'],
			
 
				+            "table_areas": ["49,650,551,77"],
			
 
				+            "columns": ["223,277,314,348,392,422,446,483"],
			
 
				             "split_text": False,
			
 
				             "flavor": "stream",
			
 
				         },
			
 
				         "rows_to_fix": {
			
 
				             -6: ["Categorías de fuentes y"],
			
 
				             -3: ["[3B] Tierra"],
			
 
				-            3: ["[2F] Uso de productos sustitutos de las",
			
 
				+            3: [
			
 
				+                "[2F] Uso de productos sustitutos de las",
			
 
				                 "[2G] Manufactura y utilización de otros",
			
 
				-                "[3] Agricultura, silvicultura y otros usos"],
			
 
				-            2: ["[2H2] Industria de la alimentación y las",
			
 
				-                "[2G2] SF₆ y PFC de otros usos de"],
			
 
				+                "[3] Agricultura, silvicultura y otros usos",
			
 
				+            ],
			
 
				+            2: [
			
 
				+                "[2H2] Industria de la alimentación y las",
			
 
				+                "[2G2] SF₆ y PFC de otros usos de",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '121': {
			
 
				+    "121": {
			
 
				         "camelot": {
			
 
				-            "table_areas": ['49,650,551,70'],
			
 
				-            "columns": ['223,277,314,348,392,422,446,483'],
			
 
				+            "table_areas": ["49,650,551,70"],
			
 
				+            "columns": ["223,277,314,348,392,422,446,483"],
			
 
				             "split_text": False,
			
 
				             "flavor": "stream",
			
 
				         },
			
 
				         "rows_to_fix": {
			
 
				             -6: ["Categorías de fuentes y"],
			
 
				             -3: ["[3B1] Tierra forestales"],
			
 
				-            3: ["[3C] Fuentes agregadas y fuentes de",
			
 
				+            3: [
			
 
				+                "[3C] Fuentes agregadas y fuentes de",
			
 
				                 "[3C1] Emisiones de GEI por quemado de",
			
 
				                 "[3C4] Emisiones directas de los N₂O de",
			
 
				                 "[3C5] Emisiones indirectas de los N₂O de",
			
@@ -123,24 +167,26 @@ page_defs = {
 
				                 "[4A2] Sitios no controlados de",
			
 
				                 "[4A3] Tiraderos a cielo abierto para",
			
 
				                 "[4B] Tratamiento biológico de los",
			
 
				-                ],
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '122': {
			
 
				+    "122": {
			
 
				         "camelot": {
			
 
				-            "table_areas": ['49,650,551,404'],
			
 
				-            "columns": ['223,277,314,348,392,422,446,483'],
			
 
				+            "table_areas": ["49,650,551,404"],
			
 
				+            "columns": ["223,277,314,348,392,422,446,483"],
			
 
				             "split_text": False,
			
 
				             "flavor": "stream",
			
 
				         },
			
 
				         "rows_to_fix": {
			
 
				             -6: ["Categorías de fuentes y"],
			
 
				-            3: ["[4C] Incineración y quema a cielo abierto",
			
 
				+            3: [
			
 
				+                "[4C] Incineración y quema a cielo abierto",
			
 
				                 "[4C1] Incineración de residuos peligrosos",
			
 
				                 "[4C2] Quema a cielo abierto de residuos",
			
 
				                 "[4D] Tratamiento y eliminación de aguas",
			
 
				                 "[4D1] Tratamiento y eliminación de",
			
 
				-                "[4D2] Tratamiento y eliminación de"],
			
 
				+                "[4D2] Tratamiento y eliminación de",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
@@ -1,10 +1,15 @@
 
				-# this script reads data from Mexico's BUR3
			
 
				-# Data is read from the pdf file
			
 
				+"""
			
 
				+Read Mexico's BUR3 from pdf
			
 
				+
			
 
				+This script reads data from Mexico's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_mex_bur3 import fix_rows, page_defs
			
 
				+from config_mex_bur3 import fix_rows, page_defs
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
@@ -12,16 +17,16 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Mexico' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Mexico'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Mexico" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Mexico"
			
 
				     if not output_folder.exists():
			
 
				-       output_folder.mkdir()
			
 
				+        output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'MEX_BUR3_2022_'
			
 
				+    output_filename = "MEX_BUR3_2022_"
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				-    inventory_file = 'Mexico_3er_BUR.pdf'
			
 
				+    inventory_file = "Mexico_3er_BUR.pdf"
			
 
				 
			
 
				-    gwp_to_use = 'AR5GWP100'
			
 
				+    gwp_to_use = "AR5GWP100"
			
 
				     year = 2019
			
 
				     entity_row = 0
			
 
				     unit_row = 1
			
@@ -43,12 +48,12 @@ if __name__ == "__main__":
 
				 
			
 
				     # manual category codes
			
 
				     cat_codes_manual = {
			
 
				-        'Todas las emisiones y las absorciones nacionales': '0',
			
 
				-        'Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada': 'M0EL',
			
 
				-        '2F6 Otras aplicaciones': '2F6',
			
 
				+        "Todas las emisiones y las absorciones nacionales": "0",
			
 
				+        "Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada": "M0EL",
			
 
				+        "2F6 Otras aplicaciones": "2F6",
			
 
				     }
			
 
				 
			
 
				-    cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
			
 
				+    cat_code_regexp = r"^\[(?P<code>[a-zA-Z0-9]{1,3})\].*"
			
 
				 
			
 
				     coords_cols = {
			
 
				         "category": "category",
			
@@ -77,18 +82,17 @@ if __name__ == "__main__":
 
				         "unit": "PRIMAP1",
			
 
				         "category": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'CH₄': 'CH4',
			
 
				-            'CO₂': 'CO2',
			
 
				-            'EMISIONES NETAS PCG AR5': 'KYOTOGHG (AR5GWP100)',
			
 
				-            'HFC': f"HFCS ({gwp_to_use})",
			
 
				-            'NF₃': f"NF3 ({gwp_to_use})",
			
 
				-            'N₂O': 'N2O',
			
 
				-            'PFC': f"PFCS ({gwp_to_use})",
			
 
				-            'SF₆': f"SF6 ({gwp_to_use})",
			
 
				+            "CH₄": "CH4",
			
 
				+            "CO₂": "CO2",
			
 
				+            "EMISIONES NETAS PCG AR5": "KYOTOGHG (AR5GWP100)",
			
 
				+            "HFC": f"HFCS ({gwp_to_use})",
			
 
				+            "NF₃": f"NF3 ({gwp_to_use})",
			
 
				+            "N₂O": "N2O",
			
 
				+            "PFC": f"PFCS ({gwp_to_use})",
			
 
				+            "SF₆": f"SF6 ({gwp_to_use})",
			
 
				         },
			
 
				     }
			
 
				 
			
 
				-
			
 
				     filter_remove = {}
			
 
				 
			
 
				     filter_keep = {}
			
@@ -102,11 +106,6 @@ if __name__ == "__main__":
 
				         "institution": "UNFCCC",
			
 
				     }
			
 
				 
			
 
				-    # convert to mass units where possible
			
 
				-    entities_to_convert_to_mass = [
			
 
				-        'NF3', 'SF6'
			
 
				-    ]
			
 
				-
			
 
				     # ###
			
 
				     # read the data from pdf into one long format dataframe
			
 
				     # ###
			
@@ -114,8 +113,9 @@ if __name__ == "__main__":
 
				     for page in page_defs.keys():
			
 
				         print(f"Working on page {page}")
			
 
				         page_def = page_defs[page]
			
 
				-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=page,
			
 
				-                                  **page_def["camelot"])
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file), pages=page, **page_def["camelot"]
			
 
				+        )
			
 
				         df_this_table = tables[0].df
			
 
				 
			
 
				         # fix rows
			
@@ -127,31 +127,36 @@ if __name__ == "__main__":
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("-", "-")
			
 
				             # replace double space in entity
			
 
				             df_this_table.iloc[0, :] = df_this_table.iloc[0, :].str.replace("  ", " ")
			
 
				-            df_this_table = fix_rows(df_this_table, page_def["rows_to_fix"][n_rows], 0,
			
 
				-                                     n_rows)
			
 
				+            df_this_table = fix_rows(
			
 
				+                df_this_table, page_def["rows_to_fix"][n_rows], 0, n_rows
			
 
				+            )
			
 
				 
			
 
				         # add units
			
 
				-        for col in df_this_table.columns.values:
			
 
				+        for col in df_this_table.columns.to_numpy():
			
 
				             if df_this_table[col].iloc[0] in units.keys():
			
 
				                 df_this_table[col].iloc[1] = units[df_this_table[col].iloc[0]]
			
 
				 
			
 
				         # bring in right format for conversion to long format
			
 
				-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
			
 
				-                                                           entity_row=entity_row,
			
 
				-                                                           regexp_unit=".*",
			
 
				-                                                           regexp_entity=".*",
			
 
				-                                                           default_unit="GgCO2eq")
			
 
				+        df_this_table = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_this_table,
			
 
				+            unit_row=unit_row,
			
 
				+            entity_row=entity_row,
			
 
				+            regexp_unit=".*",
			
 
				+            regexp_entity=".*",
			
 
				+            default_unit="GgCO2eq",
			
 
				+        )
			
 
				 
			
 
				         # set index and convert to long format
			
 
				         df_this_table = df_this_table.set_index(index_cols)
			
 
				-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
			
 
				-                                                              header_long)
			
 
				+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_this_table, year, header_long
			
 
				+        )
			
 
				 
			
 
				         # combine with tables for other sectors (merge not append)
			
 
				         if df_all is None:
			
 
				             df_all = df_this_table_long
			
 
				         else:
			
 
				-            df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
			
 
				+            df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
			
 
				 
			
 
				     # ###
			
 
				     # conversion to PM2 IF
			
@@ -162,15 +167,19 @@ if __name__ == "__main__":
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				     df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-       return m.group('code')
			
 
				-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_all["category"] = df_all["category"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				 
			
 
				     # replace "," and " " with "" in data
			
 
				-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(',','', regex=False)
			
 
				-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ','', regex=False)
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(",", "", regex=False)
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(" ", "", regex=False)
			
 
				 
			
 
				     # make sure all col headers are str
			
 
				     df_all.columns = df_all.columns.map(str)
			
@@ -185,12 +194,13 @@ if __name__ == "__main__":
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				-        convert_str=True
			
 
				-        )
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				     cat_label = "category (IPCC2006)"
			
 
				     # fix error cats
			
@@ -198,21 +208,6 @@ if __name__ == "__main__":
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				 
			
 
				-    # convert to mass units from CO2eq
			
 
				-
			
 
				-    entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in
			
 
				-                           entities_to_convert_to_mass]
			
 
				-
			
 
				-    for entity in entities_to_convert:
			
 
				-        converted = data_pm2[entity].pr.convert_to_mass()
			
 
				-        basic_entity = entity.split(" ")[0]
			
 
				-        converted = converted.to_dataset(name=basic_entity)
			
 
				-        data_pm2 = data_pm2.pr.merge(converted)
			
 
				-        data_pm2[basic_entity].attrs["entity"] = basic_entity
			
 
				-
			
 
				-    # drop the GWP data
			
 
				-    data_pm2 = data_pm2.drop_vars(entities_to_convert)
			
 
				-
			
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
@@ -222,9 +217,11 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Montenegro's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'MNE'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=MNE
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/config_mne_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/config_mne_bur3.py
@@ -1,67 +1,123 @@
 
				+"""Config for Montenegro's BUR3
			
 
				+
			
 
				+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
			
 
				+config and metadata are define din the reading script
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 # most time series are contained twice and 2005 data is also contained twice. Some
			
 
				 # data is inconsistent and we remove the time series with errors
			
 
				 drop_data = {
			
 
				-    2: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				-        "cats": ["1", "1.A", "1.A.1", "1.A.1", "1.A.2", "1.A.3", "1.A.4", "1.A.5", "1.B", "1.B.1", "1.B.2",
			
 
				-                 "2", "2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H",
			
 
				-                 "3", "3.A", "3.B"],
			
 
				-        #"years": ["2005"], # 2005 data copy of 2019
			
 
				+    2: {  # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				+        "cats": [
			
 
				+            "1",
			
 
				+            "1.A",
			
 
				+            "1.A.1",
			
 
				+            "1.A.1",
			
 
				+            "1.A.2",
			
 
				+            "1.A.3",
			
 
				+            "1.A.4",
			
 
				+            "1.A.5",
			
 
				+            "1.B",
			
 
				+            "1.B.1",
			
 
				+            "1.B.2",
			
 
				+            "2",
			
 
				+            "2.A",
			
 
				+            "2.B",
			
 
				+            "2.C",
			
 
				+            "2.D",
			
 
				+            "2.E",
			
 
				+            "2.F",
			
 
				+            "2.G",
			
 
				+            "2.H",
			
 
				+            "3",
			
 
				+            "3.A",
			
 
				+            "3.B",
			
 
				+        ],
			
 
				+        # "years": ["2005"], # 2005 data copy of 2019
			
 
				     },
			
 
				-    3: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				-        "cats": ["3.C", "3.D", "3.E", "3.F", "3.G", "5", "5.A", "5.B", "5.C", "5.D", "6"]
			
 
				-        #"years": ["2005"],
			
 
				+    3: {  # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				+        "cats": [
			
 
				+            "3.C",
			
 
				+            "3.D",
			
 
				+            "3.E",
			
 
				+            "3.F",
			
 
				+            "3.G",
			
 
				+            "5",
			
 
				+            "5.A",
			
 
				+            "5.B",
			
 
				+            "5.C",
			
 
				+            "5.D",
			
 
				+            "6",
			
 
				+        ]
			
 
				+        # "years": ["2005"],
			
 
				     },
			
 
				-    6: { #2005 data copy of 2019
			
 
				+    6: {  # 2005 data copy of 2019
			
 
				         "years": ["2005"],
			
 
				     },
			
 
				-    7: { # 2005 data copy of 2019 for 3.G
			
 
				+    7: {  # 2005 data copy of 2019 for 3.G
			
 
				         "years": ["2005"],
			
 
				     },
			
 
				-    25: { # 2005 data copy of 2019 (CO2, 2005-2019, first table)
			
 
				+    25: {  # 2005 data copy of 2019 (CO2, 2005-2019, first table)
			
 
				         "years": ["2005"],
			
 
				     },
			
 
				-    26: { # 2005 data copy of 2019 (CO2, 2005-2019, second table)
			
 
				+    26: {  # 2005 data copy of 2019 (CO2, 2005-2019, second table)
			
 
				         "years": ["2005"],
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 cat_mapping = {
			
 
				-    '3': 'M.AG',
			
 
				-    '3.A': '3.A.1',
			
 
				-    '3.B': '3.A.2',
			
 
				-    '3.C': '3.C.7', # rice
			
 
				-    '3.D': 'M.3.C.45AG', # Agricultural soils
			
 
				-    '3.E': '3.C.1.c', # prescribed burning of savanna
			
 
				-    '3.F': '3.C.1.b', # field burning of agricultural residues
			
 
				-    '3.G': '3.C.3', # urea application
			
 
				-    '4': 'M.LULUCF',
			
 
				-    '4.A': '3.B.1', # forest
			
 
				-    '4.B': '3.B.2', # cropland
			
 
				-    '4.C': '3.B.3', # grassland
			
 
				-    '4.D': '3.B.4', # wetland
			
 
				-    '4.E': '3.B.5', # Settlements
			
 
				-    '4.F': '3.B.6', # other land
			
 
				-    '4.G': '3.D.1', # HWP
			
 
				-    '5': '4',
			
 
				-    '5.A': '4.A',
			
 
				-    '5.B': '4.B',
			
 
				-    '5.C': '4.C',
			
 
				-    '5.D': '4.D',
			
 
				-    '6': '5',
			
 
				+    "3": "M.AG",
			
 
				+    "3.A": "3.A.1",
			
 
				+    "3.B": "3.A.2",
			
 
				+    "3.C": "3.C.7",  # rice
			
 
				+    "3.D": "M.3.C.45AG",  # Agricultural soils
			
 
				+    "3.E": "3.C.1.c",  # prescribed burning of savanna
			
 
				+    "3.F": "3.C.1.b",  # field burning of agricultural residues
			
 
				+    "3.G": "3.C.3",  # urea application
			
 
				+    "4": "M.LULUCF",
			
 
				+    "4.A": "3.B.1",  # forest
			
 
				+    "4.B": "3.B.2",  # cropland
			
 
				+    "4.C": "3.B.3",  # grassland
			
 
				+    "4.D": "3.B.4",  # wetland
			
 
				+    "4.E": "3.B.5",  # Settlements
			
 
				+    "4.F": "3.B.6",  # other land
			
 
				+    "4.G": "3.D.1",  # HWP
			
 
				+    "5": "4",
			
 
				+    "5.A": "4.A",
			
 
				+    "5.B": "4.B",
			
 
				+    "5.C": "4.C",
			
 
				+    "5.D": "4.D",
			
 
				+    "6": "5",
			
 
				 }
			
 
				 
			
 
				 aggregate_cats = {
			
 
				-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
			
 
				-    'M.3.C.1.AG': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass '
			
 
				-                                                          'Burning (Agriculture)'},
			
 
				-    '3.C.1': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
			
 
				-    '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    'M.3.C.AG': {'sources': ['3.C.1.AG', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-    '3.D': {'sources': ['3.D.1'], 'name': 'Other'},
			
 
				-    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				-    '0': {'sources': ['1', '2', '3', '4', '5']},
			
 
				+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+    "3.B": {
			
 
				+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
			
 
				+        "name": "Land",
			
 
				+    },
			
 
				+    "M.3.C.1.AG": {
			
 
				+        "sources": ["3.C.1.c", "3.C.1.b"],
			
 
				+        "name": "Emissions from Biomass " "Burning (Agriculture)",
			
 
				+    },
			
 
				+    "3.C.1": {
			
 
				+        "sources": ["3.C.1.c", "3.C.1.b"],
			
 
				+        "name": "Emissions from Biomass Burning",
			
 
				+    },
			
 
				+    "3.C": {
			
 
				+        "sources": ["3.C.1", "3.C.3", "M.3.C.45AG", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+    },
			
 
				+    "M.3.C.AG": {
			
 
				+        "sources": ["3.C.1.AG", "3.C.3", "M.3.C.45AG", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+    },
			
 
				+    "3.D": {"sources": ["3.D.1"], "name": "Other"},
			
 
				+    "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				+    "M.AG.ELV": {
			
 
				+        "sources": ["M.3.C.AG"],
			
 
				+        "name": "Agriculture excluding livestock emissions",
			
 
				+    },
			
 
				+    "0": {"sources": ["1", "2", "3", "4", "5"]},
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -1,41 +1,41 @@
 
				-# Montenegro BUR 3
			
 
				-# Code to read the emissions inventory contained in Montenegro's third BUR from pdf
			
 
				-# and convert into PRIMAP2 format
			
 
				+"""
			
 
				+Read Montenegro's BUR3 from pdf
			
 
				+
			
 
				+This script reads data from Montenegro's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 # ###
			
 
				 # imports
			
 
				 # ###
			
 
				 import copy
			
 
				 import re
			
 
				-from pathlib import Path
			
 
				 
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
			
 
				+from config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
			
 
				 from primap2.pm2io._data_reading import matches_time_format
			
 
				 
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				 
			
 
				-    # folders and files
			
 
				-    root_path = Path(__file__).parents[3].absolute()
			
 
				-    root_path = root_path.resolve()
			
 
				-    downloaded_data_path = root_path / "downloaded_data"
			
 
				-    extracted_data_path = root_path / "extracted_data"
			
 
				-
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Montenegro' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Montenegro'
			
 
				-    output_filename = 'MNE_BUR3_2022_'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Montenegro" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Montenegro"
			
 
				+    output_filename = "MNE_BUR3_2022_"
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				-    inventory_file_pdf = 'NIR-2021_MNE_Finalversion.pdf'
			
 
				+    inventory_file_pdf = "NIR-2021_MNE_Finalversion.pdf"
			
 
				 
			
 
				     # reading and processing
			
 
				     years_to_read = range(1990, 2018 + 1)
			
 
				-    pages_to_read = range(535,583)
			
 
				+    pages_to_read = range(535, 583)
			
 
				 
			
 
				     pos_entity = [0, 0]
			
 
				     cat_code_col = 0
			
@@ -43,7 +43,7 @@ if __name__ == "__main__":
 
				     regex_unit = r"\((.*)\)"
			
 
				     regex_entity = r"^(.*)\s\("
			
 
				 
			
 
				-    gwp_to_use = 'AR4GWP100'
			
 
				+    gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				     # conversion to PRIMAP2 format
			
 
				 
			
@@ -61,28 +61,28 @@ if __name__ == "__main__":
 
				     }
			
 
				 
			
 
				     coords_value_mapping = {
			
 
				-        'unit': 'PRIMAP1',
			
 
				-        'entity': {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "entity": {
			
 
				             f"GHG ({gwp_to_use})": f"KYOTOGHG ({gwp_to_use})",
			
 
				             f"HFC ({gwp_to_use})": f"HFCS ({gwp_to_use})",
			
 
				             f"PFC ({gwp_to_use})": f"PFCS ({gwp_to_use})",
			
 
				         },
			
 
				-        'category': {
			
 
				-            'Total national GHG emissions (with LULUCF)': '0',
			
 
				-            'Total national GHG emissions (without LULUCF)': 'M.0.EL',
			
 
				-            'International Bunkers': 'M.BK',
			
 
				-            '1.A.3.a.i': 'M.BK.A',
			
 
				-            '1.A.3.d.i': 'M.BK.M',
			
 
				-            'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
			
 
				-            '6 Other': '6',
			
 
				-            '2 H': '2.H',
			
 
				+        "category": {
			
 
				+            "Total national GHG emissions (with LULUCF)": "0",
			
 
				+            "Total national GHG emissions (without LULUCF)": "M.0.EL",
			
 
				+            "International Bunkers": "M.BK",
			
 
				+            "1.A.3.a.i": "M.BK.A",
			
 
				+            "1.A.3.d.i": "M.BK.M",
			
 
				+            "CO2 from Biomass Combustion for Energy Production": "M.BIO",
			
 
				+            "6 Other": "6",
			
 
				+            "2 H": "2.H",
			
 
				         },
			
 
				     }
			
 
				 
			
 
				     coords_value_filling = {
			
 
				         "category": {
			
 
				             "orig_cat_name": {
			
 
				-                'International Bunkers': 'M.BK',
			
 
				+                "International Bunkers": "M.BK",
			
 
				             },
			
 
				         },
			
 
				     }
			
@@ -103,7 +103,8 @@ if __name__ == "__main__":
 
				         "references": "https://unfccc.int/documents/461972",
			
 
				         "rights": "",
			
 
				         "contact": "mail@johannes-guetschow.de",
			
 
				-        "title": "Montenegro. Biennial update report (BUR). BUR 3. National inventory report.",
			
 
				+        "title": "Montenegro. Biennial update report (BUR). "
			
 
				+        "BUR 3. National inventory report.",
			
 
				         "comment": "Read fom pdf file by Johannes Gütschow",
			
 
				         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				     }
			
@@ -111,7 +112,11 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # Read all time series table from pdf
			
 
				     # ###
			
 
				-    tables = camelot.read_pdf(str(input_folder / inventory_file_pdf), pages=','.join([str(page) for page in pages_to_read]), flavor='lattice')
			
 
				+    tables = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file_pdf),
			
 
				+        pages=",".join([str(page) for page in pages_to_read]),
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # process tables and combine them using the pm2 pr.merge function
			
@@ -142,11 +147,14 @@ if __name__ == "__main__":
 
				 
			
 
				         # remove ',' in numbers
			
 
				         years = df_current_table.columns[2:]
			
 
				-        def repl(m):
			
 
				+
			
 
				+        def repl(m):  # noqa: D103
			
 
				             return m.group("part1") + m.group("part2")
			
 
				+
			
 
				         for year in years:
			
 
				             df_current_table.loc[:, year] = df_current_table.loc[:, year].str.replace(
			
 
				-                '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				+                "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+            )
			
 
				 
			
 
				         # add entity and unit cols
			
 
				         df_current_table["entity"] = entity
			
@@ -156,13 +164,15 @@ if __name__ == "__main__":
 
				             to_drop = drop_data[i]
			
 
				             if "cats" in to_drop.keys():
			
 
				                 mask = df_current_table["category"].isin(to_drop["cats"])
			
 
				-                df_current_table = df_current_table.drop(df_current_table[mask].index,
			
 
				-                                                         axis=0)
			
 
				+                df_current_table = df_current_table.drop(
			
 
				+                    df_current_table[mask].index, axis=0
			
 
				+                )
			
 
				             if "years" in to_drop.keys():
			
 
				                 df_current_table = df_current_table.drop(columns=to_drop["years"])
			
 
				 
			
 
				         df_current_table["category"] = df_current_table["category"].fillna(
			
 
				-            value=df_current_table["orig_cat_name"])
			
 
				+            value=df_current_table["orig_cat_name"]
			
 
				+        )
			
 
				 
			
 
				         df_current_table = df_current_table.drop(columns="orig_cat_name")
			
 
				 
			
@@ -191,7 +201,7 @@ if __name__ == "__main__":
 
				     # ###
			
 
				 
			
 
				     # convert to mass units from CO2eq
			
 
				-    entities_to_convert = ['N2O', 'SF6', 'CH4']
			
 
				+    entities_to_convert = ["N2O", "SF6", "CH4"]
			
 
				     entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
			
 
				 
			
 
				     # for entity in entities_to_convert:
			
@@ -215,21 +225,28 @@ if __name__ == "__main__":
 
				 
			
 
				     # map categories
			
 
				     data_if_2006 = data_if_2006.replace(
			
 
				-        {f"category ({coords_terminologies['category']})": cat_mapping})
			
 
				+        {f"category ({coords_terminologies['category']})": cat_mapping}
			
 
				+    )
			
 
				     data_if_2006[f"category ({coords_terminologies['category']})"].unique()
			
 
				 
			
 
				     # rename the category col
			
 
				-    data_if_2006.rename(columns={
			
 
				-        f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
			
 
				-                        inplace=True)
			
 
				-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				-    data_if_2006.attrs['dimensions']['*'] = [
			
 
				-        'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
			
 
				-        else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				+    data_if_2006 = data_if_2006.rename(
			
 
				+        columns={
			
 
				+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
			
 
				+        }
			
 
				+    )
			
 
				+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
			
 
				+    data_if_2006.attrs["dimensions"]["*"] = [
			
 
				+        "category (IPCC2006_PRIMAP)"
			
 
				+        if item == f"category ({coords_terminologies['category']})"
			
 
				+        else item
			
 
				+        for item in data_if_2006.attrs["dimensions"]["*"]
			
 
				+    ]
			
 
				     # aggregate categories
			
 
				     for cat_to_agg in aggregate_cats:
			
 
				         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				-            aggregate_cats[cat_to_agg]["sources"])
			
 
				+            aggregate_cats[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				         # print(df_test)
			
 
				 
			
@@ -237,10 +254,10 @@ if __name__ == "__main__":
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -248,8 +265,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
@@ -257,7 +281,7 @@ if __name__ == "__main__":
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				-            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
			
 
				+            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join="outer")
			
 
				             data_if_2006 = data_if_2006.reset_index(drop=True)
			
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
@@ -268,7 +292,6 @@ if __name__ == "__main__":
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # save data to IF and native format
			
 
				     # ###
			
@@ -276,13 +299,22 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				 
			
 
				     # data in original categories
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_all.data_vars}
			
 
				-    data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_all.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # data in 2006 categories
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Morocco's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'MAR'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=MAR
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py
@@ -1,57 +1,98 @@
 
				+"""Config for Morocco's BUR3
			
 
				+
			
 
				+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
			
 
				+config and metadata are define din the reading script
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 # define which raw tables to combine
			
 
				 table_defs = {
			
 
				     2010: {
			
 
				-        'Energy': [0, 1],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [15, 16, 17],
			
 
				-        'LULUCF': [30],
			
 
				-        'Waste': [35],
			
 
				+        "Energy": [0, 1],
			
 
				+        "Agriculture": [10],
			
 
				+        "IPPU": [15, 16, 17],
			
 
				+        "LULUCF": [30],
			
 
				+        "Waste": [35],
			
 
				     },
			
 
				     2012: {
			
 
				-        'Energy': [2, 3],
			
 
				-        'Agriculture': [11],
			
 
				-        'IPPU': [18, 19, 20],
			
 
				-        'LULUCF': [31],
			
 
				-        'Waste': [36],
			
 
				+        "Energy": [2, 3],
			
 
				+        "Agriculture": [11],
			
 
				+        "IPPU": [18, 19, 20],
			
 
				+        "LULUCF": [31],
			
 
				+        "Waste": [36],
			
 
				     },
			
 
				     2014: {
			
 
				-        'Energy': [4, 5],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [21, 22, 23],
			
 
				-        'LULUCF': [32],
			
 
				-        'Waste': [37],
			
 
				+        "Energy": [4, 5],
			
 
				+        "Agriculture": [10],
			
 
				+        "IPPU": [21, 22, 23],
			
 
				+        "LULUCF": [32],
			
 
				+        "Waste": [37],
			
 
				     },
			
 
				     2016: {
			
 
				-        'Energy': [6, 7],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [24, 25, 26],
			
 
				-        'LULUCF': [33],
			
 
				-        'Waste': [38],
			
 
				+        "Energy": [6, 7],
			
 
				+        "Agriculture": [10],
			
 
				+        "IPPU": [24, 25, 26],
			
 
				+        "LULUCF": [33],
			
 
				+        "Waste": [38],
			
 
				     },
			
 
				     2018: {
			
 
				-        'Energy': [8, 9],
			
 
				-        'Agriculture': [14],
			
 
				-        'IPPU': [27, 28, 29],
			
 
				-        'LULUCF': [34],
			
 
				-        'Waste': [39],
			
 
				+        "Energy": [8, 9],
			
 
				+        "Agriculture": [14],
			
 
				+        "IPPU": [27, 28, 29],
			
 
				+        "LULUCF": [34],
			
 
				+        "Waste": [39],
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 header_defs = {
			
 
				-    'Energy': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'Agriculture': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'Gg', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']], # units are wrong
			
 
				+    "Energy": [
			
 
				+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
			
 
				+        ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				+    ],
			
 
				+    "Agriculture": [
			
 
				+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
			
 
				+        ["", "Gg", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
			
 
				+    ],  # units are wrong
			
 
				     # in BUR pdf
			
 
				-    'IPPU': [['Catégories', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'LULUCF': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'Waste': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+    "IPPU": [
			
 
				+        [
			
 
				+            "Catégories",
			
 
				+            "CO2",
			
 
				+            "CH4",
			
 
				+            "N2O",
			
 
				+            "HFCs",
			
 
				+            "PFCs",
			
 
				+            "SF6",
			
 
				+            "NOx",
			
 
				+            "CO",
			
 
				+            "COVNM",
			
 
				+            "SO2",
			
 
				+        ],
			
 
				+        [
			
 
				+            "",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+        ],
			
 
				+    ],
			
 
				+    "LULUCF": [
			
 
				+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
			
 
				+        ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
			
 
				+    ],
			
 
				+    "Waste": [
			
 
				+        ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "COVNM", "SO2"],
			
 
				+        ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "Gg", "Gg", "Gg", "Gg"],
			
 
				+    ],
			
 
				 }
			
 
				 
			
 
				-remove_cats = ['3.A.4', '3.B', '3.B.4', '1.B.2.a', '1.B.2.b', '1.B.2.c']
			
 
				+remove_cats = ["3.A.4", "3.B", "3.B.4", "1.B.2.a", "1.B.2.b", "1.B.2.c"]
			
 
				 
			
 
				 cat_mapping = {
			
 
				     "1.B.2.a.4": "1.B.2.a.iii.4",
			
@@ -61,81 +102,119 @@ cat_mapping = {
 
				     "1.B.2.b.4": "1.B.2.b.iii.4",
			
 
				     "1.B.2.b.5": "1.B.2.b.iii.5",
			
 
				     "1.B.2.b.6": "1.B.2.b.iii.6",
			
 
				-    "1.B.2.c.1": "1.B.2.b.i", # simplification, split to oil and gas ("1.B.2.X.i")
			
 
				-    "1.B.2.c.2": "1.B.2.b.ii", # simplification, split to oil and gas ("1.B.2.X.ii")
			
 
				-    '1.A.2.g': '1.A.2.m', # other industry
			
 
				-    '3.A': '3.A.1', # enteric fermentation
			
 
				-    '3.A.1': '3.A.1.a', # cattle
			
 
				-    '3.A.1.a': '3.A.1.a.i',
			
 
				-    '3.A.1.b': '3.A.1.a.ii',
			
 
				-    '3.A.2': '3.A.1.c',
			
 
				-    '3.A.3': '3.A.1.h', # Swine
			
 
				-    '3.A.4.a': '3.A.1.d', # goats
			
 
				-    '3.A.4.b': '3.A.1.e', # camels
			
 
				-    '3.A.4.c': '3.A.1.f', # horses
			
 
				-    '3.A.4.d': '3.A.1.g', # Mules and asses
			
 
				-    '3.A.4.e': '3.A.1.i', # poultry
			
 
				-#    '3.B': '3.A.2', # Manure Management
			
 
				-    '3.B.1': '3.A.2.a', # cattle
			
 
				-    '3.B.1.a': '3.A.2.a.i',
			
 
				-    '3.B.1.b': '3.A.2.a.ii',
			
 
				-    '3.B.2': '3.A.2.c', # Sheep
			
 
				-    '3.B.3': '3.A.2.h', # Swine
			
 
				-    '3.B.4.a': '3.A.2.d', # Goats
			
 
				-    '3.B.4.b': '3.A.2.e', # Camels
			
 
				-    '3.B.4.c': '3.A.2.f', # Horses
			
 
				-    '3.B.4.d': '3.A.2.g', # Mules and Asses
			
 
				-    '3.B.4.e': '3.A.2.i', # Poultry
			
 
				-    '3.B.5': '3.C.6', # indirect N2O from manure management
			
 
				-    '3.C': '3.C.7', # rice
			
 
				-    '3.D': 'M.3.C.45AG', # Agricultural soils
			
 
				-    '3.D.a': '3.C.4', #direct N2O from agri soils
			
 
				-    '3.D.a.1': '3.C.4.a', # inorganic fertilizers
			
 
				-    '3.D.a.2': '3.C.4.b', # organic fertilizers
			
 
				-    '3.D.a.3': '3.C.4.c', # urine and dung by grazing animals
			
 
				-    '3.D.a.4': '3.C.4.d', # N in crop residues
			
 
				-    '3.D.b': '3.C.5', # indirect N2O from managed soils
			
 
				-    '3.D.b.1': '3.C.5.a', # Atmospheric deposition
			
 
				-    '3.D.b.2': '3.C.5.b', # nitrogen leeching and runoff
			
 
				-    '3.H': '3.C.3', # urea application
			
 
				-    'LU.3.B.1': '3.B.1', # forest
			
 
				-    'LU.3.B.2': '3.B.2', # cropland
			
 
				-    'LU.3.B.3': '3.B.3', # grassland
			
 
				-    'LU.3.B.4': '3.B.4', # wetland
			
 
				-    'LU.3.B.5': '3.B.5', # Settlements
			
 
				-    'LU.3.B.6': '3.B.6', # other land
			
 
				+    "1.B.2.c.1": "1.B.2.b.i",  # simplification, split to oil and gas ("1.B.2.X.i")
			
 
				+    "1.B.2.c.2": "1.B.2.b.ii",  # simplification, split to oil and gas ("1.B.2.X.ii")
			
 
				+    "1.A.2.g": "1.A.2.m",  # other industry
			
 
				+    "3.A": "3.A.1",  # enteric fermentation
			
 
				+    "3.A.1": "3.A.1.a",  # cattle
			
 
				+    "3.A.1.a": "3.A.1.a.i",
			
 
				+    "3.A.1.b": "3.A.1.a.ii",
			
 
				+    "3.A.2": "3.A.1.c",
			
 
				+    "3.A.3": "3.A.1.h",  # Swine
			
 
				+    "3.A.4.a": "3.A.1.d",  # goats
			
 
				+    "3.A.4.b": "3.A.1.e",  # camels
			
 
				+    "3.A.4.c": "3.A.1.f",  # horses
			
 
				+    "3.A.4.d": "3.A.1.g",  # Mules and asses
			
 
				+    "3.A.4.e": "3.A.1.i",  # poultry
			
 
				+    #    '3.B': '3.A.2', # Manure Management
			
 
				+    "3.B.1": "3.A.2.a",  # cattle
			
 
				+    "3.B.1.a": "3.A.2.a.i",
			
 
				+    "3.B.1.b": "3.A.2.a.ii",
			
 
				+    "3.B.2": "3.A.2.c",  # Sheep
			
 
				+    "3.B.3": "3.A.2.h",  # Swine
			
 
				+    "3.B.4.a": "3.A.2.d",  # Goats
			
 
				+    "3.B.4.b": "3.A.2.e",  # Camels
			
 
				+    "3.B.4.c": "3.A.2.f",  # Horses
			
 
				+    "3.B.4.d": "3.A.2.g",  # Mules and Asses
			
 
				+    "3.B.4.e": "3.A.2.i",  # Poultry
			
 
				+    "3.B.5": "3.C.6",  # indirect N2O from manure management
			
 
				+    "3.C": "3.C.7",  # rice
			
 
				+    "3.D": "M.3.C.45AG",  # Agricultural soils
			
 
				+    "3.D.a": "3.C.4",  # direct N2O from agri soils
			
 
				+    "3.D.a.1": "3.C.4.a",  # inorganic fertilizers
			
 
				+    "3.D.a.2": "3.C.4.b",  # organic fertilizers
			
 
				+    "3.D.a.3": "3.C.4.c",  # urine and dung by grazing animals
			
 
				+    "3.D.a.4": "3.C.4.d",  # N in crop residues
			
 
				+    "3.D.b": "3.C.5",  # indirect N2O from managed soils
			
 
				+    "3.D.b.1": "3.C.5.a",  # Atmospheric deposition
			
 
				+    "3.D.b.2": "3.C.5.b",  # nitrogen leeching and runoff
			
 
				+    "3.H": "3.C.3",  # urea application
			
 
				+    "LU.3.B.1": "3.B.1",  # forest
			
 
				+    "LU.3.B.2": "3.B.2",  # cropland
			
 
				+    "LU.3.B.3": "3.B.3",  # grassland
			
 
				+    "LU.3.B.4": "3.B.4",  # wetland
			
 
				+    "LU.3.B.5": "3.B.5",  # Settlements
			
 
				+    "LU.3.B.6": "3.B.6",  # other land
			
 
				 }
			
 
				 
			
 
				 aggregate_cats = {
			
 
				-    '1.B.2.a.iii': {'sources': ['1.B.2.a.iii.4', '1.B.2.a.iii.5', '1.B.2.a.iii.6'],
			
 
				-                    'name': 'All Other'},
			
 
				-    '1.B.2.b.iii': {'sources': ['1.B.2.b.iii.2', '1.B.2.b.iii.4', '1.B.2.b.iii.5',
			
 
				-                                '1.B.2.b.iii.6',],
			
 
				-                    'name': 'All Other'},
			
 
				-    '1.B.2.a': {'sources': ['1.B.2.a.iii'], 'name': 'Oil'},
			
 
				-    '1.B.2.b': {'sources': ['1.B.2.b.i', '1.B.2.b.ii', '1.B.2.b.iii'],
			
 
				-                'name': 'Natural Gas'},
			
 
				-    '2.D':  {'sources': ['2.D.4'], 'name': 'Non-Energy Products from Fuels and Solvent Use'},
			
 
				-    '2.F.1':  {'sources': ['2.F.1.a', '2.F.1.b'], 'name': 'Refrigeration and Air Conditioning'},
			
 
				-    '2.F':  {'sources': ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
			
 
				-             'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
			
 
				-    '2.H':  {'sources': ["2.H.1", "2.H.2", "2.H.3"], 'name': 'Other'},
			
 
				-    '3.A.2': {'sources': ['3.A.2.a', '3.A.2.c', '3.A.2.d', '3.A.2.e', '3.A.2.f',
			
 
				-                          '3.A.2.g', '3.A.2.h', '3.A.2.i'],
			
 
				-              'name': 'Manure Management'},
			
 
				-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
			
 
				-    '3.C': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    'M.3.C.AG': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-    'M.AG': {'sources': ['3.A', 'M.3.C.AG'], 'name': 'Agriculture'},
			
 
				-    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				-    '4': {'sources': ['4.A', '4.D'], 'name': 'Waste'},
			
 
				-    '0': {'sources': ['1', '2', '3', '4']},
			
 
				-    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4']},
			
 
				+    "1.B.2.a.iii": {
			
 
				+        "sources": ["1.B.2.a.iii.4", "1.B.2.a.iii.5", "1.B.2.a.iii.6"],
			
 
				+        "name": "All Other",
			
 
				+    },
			
 
				+    "1.B.2.b.iii": {
			
 
				+        "sources": [
			
 
				+            "1.B.2.b.iii.2",
			
 
				+            "1.B.2.b.iii.4",
			
 
				+            "1.B.2.b.iii.5",
			
 
				+            "1.B.2.b.iii.6",
			
 
				+        ],
			
 
				+        "name": "All Other",
			
 
				+    },
			
 
				+    "1.B.2.a": {"sources": ["1.B.2.a.iii"], "name": "Oil"},
			
 
				+    "1.B.2.b": {
			
 
				+        "sources": ["1.B.2.b.i", "1.B.2.b.ii", "1.B.2.b.iii"],
			
 
				+        "name": "Natural Gas",
			
 
				+    },
			
 
				+    "2.D": {
			
 
				+        "sources": ["2.D.4"],
			
 
				+        "name": "Non-Energy Products from Fuels and Solvent Use",
			
 
				+    },
			
 
				+    "2.F.1": {
			
 
				+        "sources": ["2.F.1.a", "2.F.1.b"],
			
 
				+        "name": "Refrigeration and Air Conditioning",
			
 
				+    },
			
 
				+    "2.F": {
			
 
				+        "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
			
 
				+        "name": "Product uses as Substitutes for Ozone Depleting Substances",
			
 
				+    },
			
 
				+    "2.H": {"sources": ["2.H.1", "2.H.2", "2.H.3"], "name": "Other"},
			
 
				+    "3.A.2": {
			
 
				+        "sources": [
			
 
				+            "3.A.2.a",
			
 
				+            "3.A.2.c",
			
 
				+            "3.A.2.d",
			
 
				+            "3.A.2.e",
			
 
				+            "3.A.2.f",
			
 
				+            "3.A.2.g",
			
 
				+            "3.A.2.h",
			
 
				+            "3.A.2.i",
			
 
				+        ],
			
 
				+        "name": "Manure Management",
			
 
				+    },
			
 
				+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+    "3.B": {
			
 
				+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
			
 
				+        "name": "Land",
			
 
				+    },
			
 
				+    "3.C": {
			
 
				+        "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+    },
			
 
				+    "M.3.C.AG": {
			
 
				+        "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+    },
			
 
				+    "M.AG": {"sources": ["3.A", "M.3.C.AG"], "name": "Agriculture"},
			
 
				+    "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				+    "M.AG.ELV": {
			
 
				+        "sources": ["M.3.C.AG"],
			
 
				+        "name": "Agriculture excluding livestock emissions",
			
 
				+    },
			
 
				+    "4": {"sources": ["4.A", "4.D"], "name": "Waste"},
			
 
				+    "0": {"sources": ["1", "2", "3", "4"]},
			
 
				+    "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},
			
 
				 }
			
 
				 
			
 
				-zero_cats = ['1.B.2.a.i', '1.B.2.a.ii'] # venting and flaring with 0 for oil as
			
 
				+zero_cats = ["1.B.2.a.i", "1.B.2.a.ii"]  # venting and flaring with 0 for oil as
			
 
				 # all mapped to natural gas
			
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -1,13 +1,23 @@
 
				-# this script reads data from Morocco's BUR3
			
 
				-# Data is read from pdf
			
 
				+"""
			
 
				+Read Morocco's BUR3 from pdf
			
 
				 
			
 
				+This script reads data from Morocco's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				 import copy
			
 
				 
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_mar_bur3 import (aggregate_cats, cat_mapping, header_defs, remove_cats,
			
 
				-                              table_defs, zero_cats)
			
 
				+from config_mar_bur3 import (
			
 
				+    aggregate_cats,
			
 
				+    cat_mapping,
			
 
				+    header_defs,
			
 
				+    remove_cats,
			
 
				+    table_defs,
			
 
				+    zero_cats,
			
 
				+)
			
 
				 from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
@@ -16,11 +26,11 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Morocco' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Morocco'
			
 
				-    output_filename = 'MAR_BUR3_2022_'
			
 
				-    inventory_file = 'Morocco_BUR3_Fr.pdf'
			
 
				-    gwp_to_use = 'AR4GWP100'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Morocco" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Morocco"
			
 
				+    output_filename = "MAR_BUR3_2022_"
			
 
				+    inventory_file = "Morocco_BUR3_Fr.pdf"
			
 
				+    gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				     # years to read
			
 
				     years = [2010, 2012, 2014, 2016, 2018]
			
@@ -31,30 +41,28 @@ if __name__ == "__main__":
 
				     # special header as category code and name in one column
			
 
				     header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				-    index_cols = ['Catégories']
			
 
				+    index_cols = ["Catégories"]
			
 
				 
			
 
				     # rows to remove
			
 
				-    cats_remove = [
			
 
				-        'Agriculture' # always empty
			
 
				-    ]
			
 
				+    cats_remove = ["Agriculture"]  # always empty
			
 
				 
			
 
				     # manual category codes
			
 
				     cat_codes_manual = {
			
 
				-        '1.A.2.e -Industries agro-alimentaires et du tabac': '1.A.2.e',
			
 
				-        '1.A.2.f -Industries des minéraux non- métalliques': '1.A.2.f',
			
 
				+        "1.A.2.e -Industries agro-alimentaires et du tabac": "1.A.2.e",
			
 
				+        "1.A.2.f -Industries des minéraux non- métalliques": "1.A.2.f",
			
 
				         #'Agriculture': 'M.AG',
			
 
				-        '2. PIUP': '2',
			
 
				-        'UTCATF': 'M.LULUCF',
			
 
				-        '3.B.1 Terres forestières': 'LU.3.B.1',
			
 
				-        '3.B.2 Terres cultivées': 'LU.3.B.2',
			
 
				-        '3.B.3 Prairies': 'LU.3.B.3',
			
 
				-        '3.B.4 Terres humides': 'LU.3.B.4',
			
 
				-        '3.B.5 Etablissements': 'LU.3.B.5',
			
 
				-        '3.B.6 Autres terres': 'LU.3.B.6',
			
 
				-        '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
			
 
				+        "2. PIUP": "2",
			
 
				+        "UTCATF": "M.LULUCF",
			
 
				+        "3.B.1 Terres forestières": "LU.3.B.1",
			
 
				+        "3.B.2 Terres cultivées": "LU.3.B.2",
			
 
				+        "3.B.3 Prairies": "LU.3.B.3",
			
 
				+        "3.B.4 Terres humides": "LU.3.B.4",
			
 
				+        "3.B.5 Etablissements": "LU.3.B.5",
			
 
				+        "3.B.6 Autres terres": "LU.3.B.6",
			
 
				+        "1.B.1.a.i.1 -Exploitation minière": "1.A.1.a.i.1",
			
 
				     }
			
 
				 
			
 
				-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
			
 
				+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*"
			
 
				 
			
 
				     coords_terminologies = {
			
 
				         "area": "ISO3",
			
@@ -66,32 +74,29 @@ if __name__ == "__main__":
 
				         "source": "MAR-GHG-inventory ",
			
 
				         "provenance": "measured",
			
 
				         "area": "MAR",
			
 
				-        "scenario": "BUR3"
			
 
				+        "scenario": "BUR3",
			
 
				     }
			
 
				 
			
 
				     coords_value_mapping = {
			
 
				         "unit": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'HFCs (AR4GWP100)': 'HFCS (AR4GWP100)',
			
 
				-            'PFCs (AR4GWP100)': 'PFCS (AR4GWP100)',
			
 
				-            'COVNM': 'NMVOC',
			
 
				-        }
			
 
				+            "HFCs (AR4GWP100)": "HFCS (AR4GWP100)",
			
 
				+            "PFCs (AR4GWP100)": "PFCS (AR4GWP100)",
			
 
				+            "COVNM": "NMVOC",
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				+    coords_cols = {"category": "category", "entity": "entity", "unit": "unit"}
			
 
				 
			
 
				-    coords_cols = {
			
 
				-        "category": "category",
			
 
				-        "entity": "entity",
			
 
				-        "unit": "unit"
			
 
				-    }
			
 
				-
			
 
				-    #add_coords_cols = {
			
 
				+    # add_coords_cols = {
			
 
				     #    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-    #}
			
 
				+    # }
			
 
				 
			
 
				     filter_remove = {
			
 
				         "f1": {
			
 
				-            "entity": ['Other halogenated gases without CO2 equivalent conversion factors (2)'],
			
 
				+            "entity": [
			
 
				+                "Other halogenated gases without CO2 equivalent conversion factors (2)"
			
 
				+            ],
			
 
				         },
			
 
				     }
			
 
				 
			
@@ -107,8 +112,9 @@ if __name__ == "__main__":
 
				     ##### read the raw data from pdf #####
			
 
				     tables = camelot.read_pdf(
			
 
				         str(input_folder / inventory_file),
			
 
				-        pages=','.join([str(page) for page in pages_to_read]),
			
 
				-        flavor='lattice')
			
 
				+        pages=",".join([str(page) for page in pages_to_read]),
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     ##### combine tables and convert to long format #####
			
 
				     df_all = None
			
@@ -120,8 +126,9 @@ if __name__ == "__main__":
 
				             df_first = tables[sector_tables[0]].df
			
 
				             if len(sector_tables) > 1:
			
 
				                 for table in sector_tables[1:]:
			
 
				-                    df_this_table = pd.concat([df_first, tables[table].df], axis=0,
			
 
				-                                              join='outer')
			
 
				+                    df_this_table = pd.concat(
			
 
				+                        [df_first, tables[table].df], axis=0, join="outer"
			
 
				+                    )
			
 
				             else:
			
 
				                 df_this_table = df_first
			
 
				 
			
@@ -130,11 +137,11 @@ if __name__ == "__main__":
 
				             df_this_table.columns = header_defs[sector]
			
 
				 
			
 
				             # fix 2018 agri table
			
 
				-            if (year == 2018) & (sector == "Agriculture"):
			
 
				+            if (year == 2018) & (sector == "Agriculture"):  # noqa: PLR2004
			
 
				                 last_shift_row = 25
			
 
				-                df_temp = df_this_table.iloc[0: last_shift_row, 1:].copy()
			
 
				-                df_this_table.iloc[0, 1:] = ''
			
 
				-                df_this_table.iloc[1: last_shift_row + 1, 1:] = df_temp
			
 
				+                df_temp = df_this_table.iloc[0:last_shift_row, 1:].copy()
			
 
				+                df_this_table.iloc[0, 1:] = ""
			
 
				+                df_this_table.iloc[1 : last_shift_row + 1, 1:] = df_temp
			
 
				 
			
 
				             # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
@@ -144,14 +151,15 @@ if __name__ == "__main__":
 
				 
			
 
				             # set index and convert to long format
			
 
				             df_this_table = df_this_table.set_index(index_cols)
			
 
				-            df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year,
			
 
				-                                                                  header_long)
			
 
				+            df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+                df_this_table, year, header_long
			
 
				+            )
			
 
				 
			
 
				             # print(df_this_table_long.head())
			
 
				             if df_all is None:
			
 
				                 df_all = df_this_table_long
			
 
				             else:
			
 
				-                df_all = pd.concat([df_all, df_this_table_long], axis=0, join='outer')
			
 
				+                df_all = pd.concat([df_all, df_this_table_long], axis=0, join="outer")
			
 
				 
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				 
			
@@ -166,24 +174,32 @@ if __name__ == "__main__":
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				     df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-        return m.group('code')
			
 
				-    df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_all["category"] = df_all["category"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				 
			
 
				     # prepare numbers for pd.to_numeric
			
 
				-    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ', '')
			
 
				-    def repl(m):
			
 
				-        return m.group('part1') + '.' + m.group('part2')
			
 
				-    df_all.loc[:, 'data'] = df_all.loc[:, 'data'].str.replace(
			
 
				-        '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-    df_all['data'][df_all['data'].isnull()] = 'NaN'
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(" ", "")
			
 
				+
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("part1") + "." + m.group("part2")
			
 
				+
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(
			
 
				+        "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+    )
			
 
				+    df_all["data"][df_all["data"].isna()] = "NaN"
			
 
				 
			
 
				     # add GWP information to entity
			
 
				     for entity in df_all["entity"].unique():
			
 
				-        df_all["entity"][(df_all["entity"] == entity) & (
			
 
				-                    df_all["unit"] == "GgCO2eq")] = f"{entity} ({gwp_to_use})"
			
 
				+        df_all["entity"][
			
 
				+            (df_all["entity"] == entity) & (df_all["unit"] == "GgCO2eq")
			
 
				+        ] = f"{entity} ({gwp_to_use})"
			
 
				 
			
 
				     # drop "original_cat_name" as it has non-unique values per category
			
 
				     df_all = df_all.drop(columns="orig_cat_name")
			
@@ -196,7 +212,8 @@ if __name__ == "__main__":
 
				         coords_value_mapping=coords_value_mapping,
			
 
				         filter_remove=filter_remove,
			
 
				         meta_data=meta_data,
			
 
				-        convert_str=True
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				     # make sure all col headers are str
			
@@ -205,7 +222,9 @@ if __name__ == "__main__":
 
				     # conversion to PRIMAP2 native format
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				 
			
 
				-    entities_to_convert = ['CO2'] #['N2O', 'SF6', 'CO2', 'CH4'] # CO2 is not converted on
			
 
				+    entities_to_convert = [
			
 
				+        "CO2"
			
 
				+    ]  # ['N2O', 'SF6', 'CO2', 'CH4'] # CO2 is not converted on
			
 
				     # conversion to IF as data with and without GWP exists. needs to be fixed in primap2
			
 
				     entities_to_convert = [f"{entity} (AR4GWP100)" for entity in entities_to_convert]
			
 
				 
			
@@ -230,38 +249,42 @@ if __name__ == "__main__":
 
				     data_if_2006.attrs = copy.deepcopy(data_if.attrs)
			
 
				 
			
 
				     filter_remove_cats = {
			
 
				-        "cat": {
			
 
				-            f"category ({coords_terminologies['category']})":
			
 
				-        remove_cats
			
 
				-        },
			
 
				+        "cat": {f"category ({coords_terminologies['category']})": remove_cats},
			
 
				     }
			
 
				 
			
 
				     filter_data(data_if_2006, filter_remove=filter_remove_cats)
			
 
				 
			
 
				     # map categories
			
 
				     data_if_2006 = data_if_2006.replace(
			
 
				-        {f"category ({coords_terminologies['category']})": cat_mapping})
			
 
				+        {f"category ({coords_terminologies['category']})": cat_mapping}
			
 
				+    )
			
 
				     data_if_2006[f"category ({coords_terminologies['category']})"].unique()
			
 
				 
			
 
				     # rename the category col
			
 
				-    data_if_2006.rename(columns={
			
 
				-        f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
			
 
				-                        inplace=True)
			
 
				-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				-    data_if_2006.attrs['dimensions']['*'] = [
			
 
				-        'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
			
 
				-        else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				+    data_if_2006 = data_if_2006.rename(
			
 
				+        columns={
			
 
				+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
			
 
				+        }
			
 
				+    )
			
 
				+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
			
 
				+    data_if_2006.attrs["dimensions"]["*"] = [
			
 
				+        "category (IPCC2006_PRIMAP)"
			
 
				+        if item == f"category ({coords_terminologies['category']})"
			
 
				+        else item
			
 
				+        for item in data_if_2006.attrs["dimensions"]["*"]
			
 
				+    ]
			
 
				     # aggregate categories
			
 
				-    time_format = '%Y'
			
 
				+    time_format = "%Y"
			
 
				     time_columns = [
			
 
				         col
			
 
				-        for col in data_if_2006.columns.values
			
 
				+        for col in data_if_2006.columns.to_numpy()
			
 
				         if matches_time_format(col, time_format)
			
 
				     ]
			
 
				 
			
 
				     for cat_to_agg in aggregate_cats:
			
 
				         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				-            aggregate_cats[cat_to_agg]["sources"])
			
 
				+            aggregate_cats[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				         # print(df_test)
			
 
				 
			
@@ -273,8 +296,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
@@ -282,15 +312,16 @@ if __name__ == "__main__":
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				-            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
			
 
				+            data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join="outer")
			
 
				             data_if_2006 = data_if_2006.reset_index(drop=True)
			
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				     for cat in zero_cats:
			
 
				         entities = data_if_2006["entity"].unique()
			
 
				-        data_zero = data_if_2006[data_if_2006["category (IPCC2006_PRIMAP)"]=="1"].copy(
			
 
				-            deep=True)
			
 
				+        data_zero = data_if_2006[
			
 
				+            data_if_2006["category (IPCC2006_PRIMAP)"] == "1"
			
 
				+        ].copy(deep=True)
			
 
				         data_zero["category (IPCC2006_PRIMAP)"] = cat
			
 
				         for col in time_columns:
			
 
				             data_zero[col] = 0
			
@@ -303,7 +334,6 @@ if __name__ == "__main__":
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # save data to IF and native format
			
 
				     # ###
			
@@ -312,17 +342,21 @@ if __name__ == "__main__":
 
				 
			
 
				     # data in original categories
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # data in 2006 categories
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				     data_pm2_2006.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Nigeria's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'NGA'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=NGA
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py
@@ -1,274 +1,280 @@
 
				-gwp_to_use = 'AR5GWP100'
			
 
				+"""Config for Nigeria's BUR3
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+gwp_to_use = "AR5GWP100"
			
 
				 
			
 
				 tables_trends = {
			
 
				-    '70': { # GHG by main sector
			
 
				-        'page': '70',
			
 
				-        'area': ['177,430,450,142'],
			
 
				-        'cols': ['208,260,311,355,406'],
			
 
				-        'coords_defaults': {
			
 
				-            'unit': 'GgCO2eq',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "70": {  # GHG by main sector
			
 
				+        "page": "70",
			
 
				+        "area": ["177,430,450,142"],
			
 
				+        "cols": ["208,260,311,355,406"],
			
 
				+        "coords_defaults": {
			
 
				+            "unit": "GgCO2eq",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				             "entity": "entity",
			
 
				         },
			
 
				-        'copy_cols': {
			
 
				+        "copy_cols": {
			
 
				             # to: from
			
 
				-            'entity': 'Year',
			
 
				+            "entity": "Year",
			
 
				         },
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'AFOLU': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "AFOLU": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				-            'entity': {
			
 
				-                'Total emissions': f'KYOTOGHG emissions ({gwp_to_use})',
			
 
				-                'Energy': f'KYOTOGHG ({gwp_to_use})',
			
 
				-                'IPPU': f'KYOTOGHG ({gwp_to_use})',
			
 
				-                'AFOLU': f'KYOTOGHG emissions ({gwp_to_use})',
			
 
				-                'Waste': f'KYOTOGHG ({gwp_to_use})',
			
 
				+            "entity": {
			
 
				+                "Total emissions": f"KYOTOGHG emissions ({gwp_to_use})",
			
 
				+                "Energy": f"KYOTOGHG ({gwp_to_use})",
			
 
				+                "IPPU": f"KYOTOGHG ({gwp_to_use})",
			
 
				+                "AFOLU": f"KYOTOGHG emissions ({gwp_to_use})",
			
 
				+                "Waste": f"KYOTOGHG ({gwp_to_use})",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows': [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '71': { # main gases by sector
			
 
				-    'page': '71',
			
 
				-        'area': ['82,760,509,454'],
			
 
				-        'cols': ['124,186,249,326,388,454'],
			
 
				-        'coords_defaults': {
			
 
				-            'category': '0',
			
 
				-            'unit': 'GgCO2eq',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "71": {  # main gases by sector
			
 
				+        "page": "71",
			
 
				+        "area": ["82,760,509,454"],
			
 
				+        "cols": ["124,186,249,326,388,454"],
			
 
				+        "coords_defaults": {
			
 
				+            "category": "0",
			
 
				+            "unit": "GgCO2eq",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "entity": "Year",
			
 
				         },
			
 
				-        'remove_cols': [],
			
 
				-        'coords_value_mapping': {
			
 
				+        "remove_cols": [],
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'entity': {
			
 
				-                'Total GHG emissions (CO₂-eq)': f'KYOTOGHG emissions ({gwp_to_use})',
			
 
				-                'Removals (CO₂) (CO₂-eq)': 'CO2 removals',
			
 
				-                'Net emissions (CO₂-eq)': f'KYOTOGHG ({gwp_to_use})',
			
 
				-                'CO₂ (Gg)': 'CO2 emissions',
			
 
				-                'CH₄ (CO₂-eq)': f'CH4 ({gwp_to_use})',
			
 
				-                'N₂O (CO₂-eq)': f'N2O ({gwp_to_use})',
			
 
				+            "entity": {
			
 
				+                "Total GHG emissions (CO₂-eq)": f"KYOTOGHG emissions ({gwp_to_use})",
			
 
				+                "Removals (CO₂) (CO₂-eq)": "CO2 removals",
			
 
				+                "Net emissions (CO₂-eq)": f"KYOTOGHG ({gwp_to_use})",
			
 
				+                "CO₂ (Gg)": "CO2 emissions",
			
 
				+                "CH₄ (CO₂-eq)": f"CH4 ({gwp_to_use})",
			
 
				+                "N₂O (CO₂-eq)": f"N2O ({gwp_to_use})",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2, 3, 4],
			
 
				+        "label_rows": [0, 1, 2, 3, 4],
			
 
				     },
			
 
				-    '72_1': { # CO2 by main sector
			
 
				-    'page': '72',
			
 
				-        'area': ['122,760,496,472'],
			
 
				-        'cols': ['159,212,265,311,355,406,456'],
			
 
				-        'coords_defaults': {
			
 
				+    "72_1": {  # CO2 by main sector
			
 
				+        "page": "72",
			
 
				+        "area": ["122,760,496,472"],
			
 
				+        "cols": ["159,212,265,311,355,406,456"],
			
 
				+        "coords_defaults": {
			
 
				             #'entity': 'CO2',
			
 
				-            'unit': 'Gg',
			
 
				+            "unit": "Gg",
			
 
				         },
			
 
				-        'coords_cols': {
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				-            'entity': 'entity',
			
 
				+            "entity": "entity",
			
 
				         },
			
 
				-        'remove_cols': ['Total emissions'],
			
 
				-        'copy_cols': {
			
 
				+        "remove_cols": ["Total emissions"],
			
 
				+        "copy_cols": {
			
 
				             # to: from
			
 
				-            'entity': 'Year',
			
 
				+            "entity": "Year",
			
 
				         },
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total net emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'AFOLU - emissions': '3',
			
 
				-                'AFOLU - removals': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total net emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "AFOLU - emissions": "3",
			
 
				+                "AFOLU - removals": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				-            'entity': {
			
 
				-                'Total net emissions': 'CO2',
			
 
				-                'Energy': 'CO2',
			
 
				-                'IPPU': 'CO2',
			
 
				-                'AFOLU - emissions': 'CO2 emissions',
			
 
				-                'AFOLU - removals': 'CO2 removals',
			
 
				-                'Waste': 'CO2',
			
 
				+            "entity": {
			
 
				+                "Total net emissions": "CO2",
			
 
				+                "Energy": "CO2",
			
 
				+                "IPPU": "CO2",
			
 
				+                "AFOLU - emissions": "CO2 emissions",
			
 
				+                "AFOLU - removals": "CO2 removals",
			
 
				+                "Waste": "CO2",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '72_2': { # CH4 by sector
			
 
				-    'page': '72',
			
 
				-        'area': ['133,333,483,41'],
			
 
				-        'cols': ['172,230,280,333,384,439'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'CH4',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "72_2": {  # CH4 by sector
			
 
				+        "page": "72",
			
 
				+        "area": ["133,333,483,41"],
			
 
				+        "cols": ["172,230,280,333,384,439"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "CH4",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				-        'remove_cols': ['Total (Gg CO₂-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "remove_cols": ["Total (Gg CO₂-eq)"],
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'AFOLU - emissions': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "AFOLU - emissions": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '73': { # N2O by sector
			
 
				-    'page': '73',
			
 
				-        'area': ['155,666,643,364'],
			
 
				-        'cols': ['194,265,309,366,419'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'N2O',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "73": {  # N2O by sector
			
 
				+        "page": "73",
			
 
				+        "area": ["155,666,643,364"],
			
 
				+        "cols": ["194,265,309,366,419"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "N2O",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				-        'remove_cols': ['Total emissions (Gg CO₂-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "remove_cols": ["Total emissions (Gg CO₂-eq)"],
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total': '0',
			
 
				-                'Energy': '1',
			
 
				-                'AFOLU': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total": "0",
			
 
				+                "Energy": "1",
			
 
				+                "AFOLU": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '74': { # NOx by sector
			
 
				-    'page': '74',
			
 
				-        'area': ['148,457,467,166'],
			
 
				-        'cols': ['190,254,304,359,421'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'NOX',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "74": {  # NOx by sector
			
 
				+        "page": "74",
			
 
				+        "area": ["148,457,467,166"],
			
 
				+        "cols": ["190,254,304,359,421"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "NOX",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				         #'remove_cols': [],
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'AFOLU': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "AFOLU": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '75': { # CO by sector
			
 
				-    'page': '75',
			
 
				-        'area': ['161,763,456,472'],
			
 
				-        'cols': ['199,256,307,359,410'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'CO',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "75": {  # CO by sector
			
 
				+        "page": "75",
			
 
				+        "area": ["161,763,456,472"],
			
 
				+        "cols": ["199,256,307,359,410"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "CO",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'AFOLU': '3',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "AFOLU": "3",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '75_2': { # NMVOC by sector
			
 
				-    'page': '75',
			
 
				-        'area': ['177,325,441,50'],
			
 
				-        'cols': ['219,287,340,395'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'NMVOC',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "75_2": {  # NMVOC by sector
			
 
				+        "page": "75",
			
 
				+        "area": ["177,325,441,50"],
			
 
				+        "cols": ["219,287,340,395"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "NMVOC",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '76_1': { # NMVOC by sector
			
 
				-    'page': '76',
			
 
				-        'area': ['175,782,448,675'],
			
 
				-        'cols': ['216,282,340,390'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'NMVOC',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "76_1": {  # NMVOC by sector
			
 
				+        "page": "76",
			
 
				+        "area": ["175,782,448,675"],
			
 
				+        "cols": ["216,282,340,390"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "NMVOC",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'IPPU': '2',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "IPPU": "2",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0, 1, 2],
			
 
				+        "label_rows": [0, 1, 2],
			
 
				     },
			
 
				-    '76_2': { # SO2 by sector
			
 
				-    'page': '76',
			
 
				-        'area': ['197,562,421,226'],
			
 
				-        'cols': ['243,331,381'],
			
 
				-        'coords_defaults': {
			
 
				-            'entity': 'SO2',
			
 
				-            'unit': 'Gg',
			
 
				-        },
			
 
				-        'coords_cols': {
			
 
				+    "76_2": {  # SO2 by sector
			
 
				+        "page": "76",
			
 
				+        "area": ["197,562,421,226"],
			
 
				+        "cols": ["243,331,381"],
			
 
				+        "coords_defaults": {
			
 
				+            "entity": "SO2",
			
 
				+            "unit": "Gg",
			
 
				+        },
			
 
				+        "coords_cols": {
			
 
				             "category": "Year",
			
 
				         },
			
 
				         #'remove_cols': ['Total emissions (Gg CO2-eq)'],
			
 
				-        'coords_value_mapping': {
			
 
				+        "coords_value_mapping": {
			
 
				             "unit": "PRIMAP1",
			
 
				-            'category': {
			
 
				-                'Total emissions': '0',
			
 
				-                'Energy': '1',
			
 
				-                'Waste': '4',
			
 
				+            "category": {
			
 
				+                "Total emissions": "0",
			
 
				+                "Energy": "1",
			
 
				+                "Waste": "4",
			
 
				             },
			
 
				         },
			
 
				-        'label_rows':  [0],
			
 
				+        "label_rows": [0],
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 pages_inventory = {
			
 
				-    '78': 1,
			
 
				-    '79': 0,
			
 
				-    '80': 0,
			
 
				-    '81': 0,
			
 
				-    '82': 0,
			
 
				+    "78": 1,
			
 
				+    "79": 0,
			
 
				+    "80": 0,
			
 
				+    "81": 0,
			
 
				+    "82": 0,
			
 
				 }
			
 
				 
			
 
				 year_inventory = 2017
			
@@ -279,8 +285,8 @@ unit_row = 0
 
				 ###
			
 
				 index_cols = "Categories"
			
 
				 units_inv = {
			
 
				-    'Emissions (Gg)': 'Gg',
			
 
				-    'Emissions CO2 Equivalents (Gg)': 'GgCO2eq',
			
 
				+    "Emissions (Gg)": "Gg",
			
 
				+    "Emissions CO2 Equivalents (Gg)": "GgCO2eq",
			
 
				 }
			
 
				 # special header as category UNFCCC_GHG_data and name in one column
			
 
				 header_long = ["category", "entity", "unit", "time", "data"]
			
@@ -288,11 +294,11 @@ header_long = ["category", "entity", "unit", "time", "data"]
 
				 
			
 
				 # manual category codes
			
 
				 cat_codes_manual = {
			
 
				-    'Total National Emissions and Removals': '0',
			
 
				-    'International Bunkers': 'M.BK',
			
 
				+    "Total National Emissions and Removals": "0",
			
 
				+    "International Bunkers": "M.BK",
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,9})\s.*'
			
 
				+cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,9})\s.*"
			
 
				 
			
 
				 coords_cols = {
			
 
				     "category": "category",
			
@@ -321,29 +327,24 @@ coords_value_mapping = {
 
				     "unit": "PRIMAP1",
			
 
				     "category": "PRIMAP1",
			
 
				     "entity": {
			
 
				-        'Net CO2 (1)(2)': 'CO2',
			
 
				-        'CH4': "CH4",
			
 
				-        'N2O': "N2O",
			
 
				-        'HFCs': f"HFCS ({gwp_to_use})",
			
 
				-        'PFCs': f"PFCS ({gwp_to_use})",
			
 
				-        'SF6': f"SF6 ({gwp_to_use})",
			
 
				+        "Net CO2 (1)(2)": "CO2",
			
 
				+        "CH4": "CH4",
			
 
				+        "N2O": "N2O",
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "SF6": f"SF6 ({gwp_to_use})",
			
 
				         #'NOx': 'NOX',
			
 
				-        'CO': 'CO', # no mapping, just added for completeness here
			
 
				-        'NMVOCs': 'NMVOC',
			
 
				-        'SO2': 'SO2', # no mapping, just added for completeness here
			
 
				-        'Other halogenated gases with CO2 eq conversion factors (3)':
			
 
				-            f"UnspMixOfHFCs ({gwp_to_use})",
			
 
				+        "CO": "CO",  # no mapping, just added for completeness here
			
 
				+        "NMVOCs": "NMVOC",
			
 
				+        "SO2": "SO2",  # no mapping, just added for completeness here
			
 
				+        "Other halogenated gases with CO2 eq conversion factors (3)": f"UnspMixOfHFCs ({gwp_to_use})",
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 
			
 
				 filter_remove = {
			
 
				-    'f1': {
			
 
				-        'entity': ['Other halogenated gases without CO2 eq conversion factors (4)']
			
 
				-    },
			
 
				-    'f2': {
			
 
				-        'category': 'Memo'
			
 
				-    },
			
 
				+    "f1": {"entity": ["Other halogenated gases without CO2 eq conversion factors (4)"]},
			
 
				+    "f2": {"category": "Memo"},
			
 
				 }
			
 
				 
			
 
				 filter_keep = {}
			
@@ -353,73 +354,90 @@ meta_data = {
 
				     "rights": "",
			
 
				     "contact": "mail@johannes-guestchow.de",
			
 
				     "title": "Nigeria. Second Biennial Update Report (BUR2) to the United Nations "
			
 
				-             "Framework Convention on Climate Change",
			
 
				+    "Framework Convention on Climate Change",
			
 
				     "comment": "Read fom pdf by Johannes Gütschow",
			
 
				     "institution": "UNFCCC",
			
 
				 }
			
 
				 
			
 
				 # convert to mass units where possible
			
 
				-entities_to_convert_to_mass = [
			
 
				-    'CH4', 'N2O', 'SF6'
			
 
				-]
			
 
				+entities_to_convert_to_mass = ["CH4", "N2O", "SF6"]
			
 
				 
			
 
				-# CO2 equivalents don't make sense for these substances, so unit has to be Gg instead of Gg CO2 equivalents as indicated in the table
			
 
				-entities_to_fix_unit = [
			
 
				-    'NOx', 'CO', 'NMVOCs', 'SO2'
			
 
				-]
			
 
				+# CO2 equivalents don't make sense for these substances, so unit has to be Gg instead
			
 
				+# of Gg CO2 equivalents as indicated in the table
			
 
				+entities_to_fix_unit = ["NOx", "CO", "NMVOCs", "SO2"]
			
 
				 
			
 
				 ### processing
			
 
				 
			
 
				 processing_info_step1 = {
			
 
				-    'aggregate_cats': {
			
 
				-        '2.F': {'sources': ['2.F.2', '2.F.6'], # all 0, but for completeness
			
 
				-              'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
			
 
				-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G'],
			
 
				-              'name': 'IPPU'}, # for HFCs, PFCs, SO2, SF6, N2O (all 0)
			
 
				+    "aggregate_cats": {
			
 
				+        "2.F": {
			
 
				+            "sources": ["2.F.2", "2.F.6"],  # all 0, but for completeness
			
 
				+            "name": "Product uses as Substitutes for Ozone Depleting Substances",
			
 
				+        },
			
 
				+        "2": {
			
 
				+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G"],
			
 
				+            "name": "IPPU",
			
 
				+        },  # for HFCs, PFCs, SO2, SF6, N2O (all 0)
			
 
				     },
			
 
				 }
			
 
				 
			
 
				-processing_info_step2 =  {
			
 
				-    'aggregate_cats': {
			
 
				-        'M.AG.ELV': {'sources': ['3.C'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				-        'M.AG': {'sources': ['M.AG.ELV', '3.A'], 'name': 'Agriculture'},
			
 
				-        'M.LULUCF': {'sources': ['3.B', '3.D'],
			
 
				-                     'name': 'Land Use, Land Use Change, and Forestry'},
			
 
				-        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National Total Excluding LULUCF'},
			
 
				-        '0': {'sources': ['1', '2', '3', '4', '5'], 'name': 'National Total'},
			
 
				+processing_info_step2 = {
			
 
				+    "aggregate_cats": {
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["3.C"],
			
 
				+            "name": "Agriculture excluding livestock emissions",
			
 
				+        },
			
 
				+        "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
			
 
				+        "M.LULUCF": {
			
 
				+            "sources": ["3.B", "3.D"],
			
 
				+            "name": "Land Use, Land Use Change, and Forestry",
			
 
				+        },
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+            "name": "National Total Excluding LULUCF",
			
 
				+        },
			
 
				+        "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
			
 
				     },
			
 
				-    'downscale': {
			
 
				-        'sectors': {
			
 
				-            '1': {
			
 
				-                'basket': '1',
			
 
				-                'basket_contents': ['1.A', '1.B', '1.C'],
			
 
				-                'entities': ['CO2', 'N2O', 'CH4'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+    "downscale": {
			
 
				+        "sectors": {
			
 
				+            "1": {
			
 
				+                "basket": "1",
			
 
				+                "basket_contents": ["1.A", "1.B", "1.C"],
			
 
				+                "entities": ["CO2", "N2O", "CH4"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            '1.A': {
			
 
				-                'basket': '1.A',
			
 
				-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				-                'entities': ['CO2', 'N2O', 'CH4'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "1.A": {
			
 
				+                "basket": "1.A",
			
 
				+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
			
 
				+                "entities": ["CO2", "N2O", "CH4"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            '1.B': {
			
 
				-                'basket': '1.B',
			
 
				-                'basket_contents': ['1.B.1', '1.B.2', '1.B.3'],
			
 
				-                'entities': ['CO2', 'N2O', 'CH4'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "1.B": {
			
 
				+                "basket": "1.B",
			
 
				+                "basket_contents": ["1.B.1", "1.B.2", "1.B.3"],
			
 
				+                "entities": ["CO2", "N2O", "CH4"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            'IPPU': {
			
 
				-                'basket': '2',
			
 
				-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.E',
			
 
				-                                    '2.F', '2.G', '2.H'],
			
 
				-                'entities': ['CO2', 'N2O', 'CH4'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "IPPU": {
			
 
				+                "basket": "2",
			
 
				+                "basket_contents": [
			
 
				+                    "2.A",
			
 
				+                    "2.B",
			
 
				+                    "2.C",
			
 
				+                    "2.D",
			
 
				+                    "2.E",
			
 
				+                    "2.F",
			
 
				+                    "2.G",
			
 
				+                    "2.H",
			
 
				+                ],
			
 
				+                "entities": ["CO2", "N2O", "CH4"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            '3': {
			
 
				-                'basket': '3',
			
 
				-                'basket_contents': ['3.A', '3.B', '3.C', '3.D'],
			
 
				-                'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "3": {
			
 
				+                "basket": "3",
			
 
				+                "basket_contents": ["3.A", "3.B", "3.C", "3.D"],
			
 
				+                "entities": ["CO2", "CH4", "N2O"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				             # '3A': {
			
 
				             #     'basket': '3.A',
			
@@ -442,17 +460,21 @@ processing_info_step2 =  {
 
				             # },
			
 
				         },
			
 
				     },
			
 
				-    'remove_ts': {
			
 
				-        'fgases': { # unnecessary and complicates aggregation for
			
 
				+    "remove_ts": {
			
 
				+        "fgases": {  # unnecessary and complicates aggregation for
			
 
				             # other gases
			
 
				-            'category': ['5'],
			
 
				-            'entities': [f'HFCS ({gwp_to_use})', f'PFCS ({gwp_to_use})', 'SF6',
			
 
				-                         f'UnspMixOfHFCs ({gwp_to_use})'],
			
 
				+            "category": ["5"],
			
 
				+            "entities": [
			
 
				+                f"HFCS ({gwp_to_use})",
			
 
				+                f"PFCS ({gwp_to_use})",
			
 
				+                "SF6",
			
 
				+                f"UnspMixOfHFCs ({gwp_to_use})",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR4GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS", "UnspMixOfHFCs"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS", "UnspMixOfHFCs"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
@@ -1,5 +1,10 @@
 
				-# this script reads data from Nigeria's BUR2
			
 
				-# Data is read from the pdf file
			
 
				+"""
			
 
				+Read nigeria's BUR2 from pdf
			
 
				+
			
 
				+This script reads data from Nigeria's BUR2
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import locale
			
 
				 from copy import deepcopy
			
@@ -9,32 +14,32 @@ import numpy as np
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				 import xarray as xr
			
 
				-from .config_nga_bur2 import (
			
 
				-   cat_code_regexp,
			
 
				-   cat_codes_manual,
			
 
				-   coords_cols,
			
 
				-   coords_defaults,
			
 
				-   coords_terminologies,
			
 
				-   coords_value_mapping,  #, add_coords_cols
			
 
				-   entity_row,
			
 
				-   filter_remove,
			
 
				-   header_long,
			
 
				-   index_cols,
			
 
				-   meta_data,
			
 
				-   pages_inventory,
			
 
				-   processing_info_step1,
			
 
				-   processing_info_step2,
			
 
				-   tables_trends,
			
 
				-   unit_row,
			
 
				-   units_inv,
			
 
				-   year_inventory,
			
 
				+from config_nga_bur2 import (
			
 
				+    cat_code_regexp,
			
 
				+    cat_codes_manual,
			
 
				+    coords_cols,
			
 
				+    coords_defaults,
			
 
				+    coords_terminologies,
			
 
				+    coords_value_mapping,  # , add_coords_cols
			
 
				+    entity_row,
			
 
				+    filter_remove,
			
 
				+    header_long,
			
 
				+    index_cols,
			
 
				+    meta_data,
			
 
				+    pages_inventory,
			
 
				+    processing_info_step1,
			
 
				+    processing_info_step2,
			
 
				+    tables_trends,
			
 
				+    unit_row,
			
 
				+    units_inv,
			
 
				+    year_inventory,
			
 
				 )
			
 
				 
			
 
				 from unfccc_ghg_data.helper import (
			
 
				-   downloaded_data_path,
			
 
				-   extracted_data_path,
			
 
				-   gas_baskets,
			
 
				-   process_data_for_country,
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				 )
			
 
				 
			
 
				 if __name__ == "__main__":
			
@@ -42,61 +47,74 @@ if __name__ == "__main__":
 
				     # configuration
			
 
				     # ###
			
 
				     # define locale to use for str to float conversion
			
 
				-    locale_to_use = 'en_NG.UTF-8'
			
 
				+    locale_to_use = "en_NG.UTF-8"
			
 
				     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
			
 
				 
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Nigeria' / 'BUR2'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Nigeria'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Nigeria" / "BUR2"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Nigeria"
			
 
				     if not output_folder.exists():
			
 
				-       output_folder.mkdir()
			
 
				+        output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'NGA_BUR2_2021_'
			
 
				+    output_filename = "NGA_BUR2_2021_"
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				-    inventory_file = 'NIGERIA_BUR_2_-_Second_Biennial_Update_Report_%28BUR2%29.pdf'
			
 
				+    inventory_file = "NIGERIA_BUR_2_-_Second_Biennial_Update_Report_%28BUR2%29.pdf"
			
 
				 
			
 
				     ## read 2019 inventory
			
 
				     df_inventory = None
			
 
				     for page in pages_inventory.keys():
			
 
				-        tables = camelot.read_pdf(str(input_folder / inventory_file), pages=str(page),
			
 
				-                                  flavor='lattice')
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file), pages=str(page), flavor="lattice"
			
 
				+        )
			
 
				         df_this_table = tables[pages_inventory[page]].df
			
 
				         # replace line breaks, double, and triple spaces in category names
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				         # replace line breaks in units and entities
			
 
				-        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace('\n',
			
 
				-                                                                                    '')
			
 
				-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace('\n', '')
			
 
				+        df_this_table.iloc[entity_row] = df_this_table.iloc[entity_row].str.replace(
			
 
				+            "\n", ""
			
 
				+        )
			
 
				+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].str.replace(
			
 
				+            "\n", ""
			
 
				+        )
			
 
				 
			
 
				         # fillna in unit row
			
 
				-        df_this_table.iloc[unit_row][df_this_table.iloc[unit_row]==""] = np.nan
			
 
				-        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].fillna(
			
 
				-            method='ffill')
			
 
				-        df_this_table = pm2.pm2io.nir_add_unit_information(df_this_table, unit_row=unit_row,
			
 
				-                                                           entity_row=entity_row,
			
 
				-                                                           regexp_entity=".*",
			
 
				-                                                           manual_repl_unit=units_inv,
			
 
				-                                                           default_unit="")
			
 
				+        df_this_table.iloc[unit_row][df_this_table.iloc[unit_row] == ""] = np.nan
			
 
				+        df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].ffill()
			
 
				+        df_this_table = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_this_table,
			
 
				+            unit_row=unit_row,
			
 
				+            entity_row=entity_row,
			
 
				+            regexp_entity=".*",
			
 
				+            manual_repl_unit=units_inv,
			
 
				+            default_unit="",
			
 
				+        )
			
 
				 
			
 
				         # set index and convert to long format
			
 
				         df_this_table = df_this_table.set_index(index_cols)
			
 
				-        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(df_this_table, year_inventory,
			
 
				-                                                              header_long)
			
 
				+        df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_this_table, year_inventory, header_long
			
 
				+        )
			
 
				 
			
 
				         # combine with tables for other sectors (merge not append)
			
 
				         if df_inventory is None:
			
 
				             df_inventory = df_this_table_long
			
 
				         else:
			
 
				-            df_inventory = pd.concat([df_inventory, df_this_table_long], axis=0, join='outer')
			
 
				+            df_inventory = pd.concat(
			
 
				+                [df_inventory, df_this_table_long], axis=0, join="outer"
			
 
				+            )
			
 
				 
			
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				     df_inventory["category"] = df_inventory["category"].replace(cat_codes_manual)
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-       return m.group('code')
			
 
				-    df_inventory["category"] = df_inventory["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_inventory["category"] = df_inventory["category"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				     df_inventory = df_inventory.reset_index(drop=True)
			
 
				 
			
 
				     # ###
			
@@ -105,15 +123,15 @@ if __name__ == "__main__":
 
				     data_inv_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_inventory,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				         filter_remove=filter_remove,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-        time_format='%Y',
			
 
				-        )
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				     data_inv_pm2 = pm2.pm2io.from_interchange_format(data_inv_if)
			
 
				 
			
@@ -122,19 +140,21 @@ if __name__ == "__main__":
 
				     for table in tables_trends.keys():
			
 
				         print(table)
			
 
				         current_table = deepcopy(tables_trends[table])
			
 
				-        tables = camelot.read_pdf(str(input_folder / inventory_file),
			
 
				-                                  pages=current_table["page"],
			
 
				-                                  table_areas=current_table["area"],
			
 
				-                                  columns=current_table["cols"],
			
 
				-                                  flavor='stream',
			
 
				-                                  split_text=True)
			
 
				+        tables = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file),
			
 
				+            pages=current_table["page"],
			
 
				+            table_areas=current_table["area"],
			
 
				+            columns=current_table["cols"],
			
 
				+            flavor="stream",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				         df_this_table = tables[0].df
			
 
				 
			
 
				         # merge rows for entity and unit
			
 
				         rows_to_merge = df_this_table.iloc[current_table["label_rows"]]
			
 
				         indices_to_merge = rows_to_merge.index
			
 
				         # join the three rows
			
 
				-        new_row = rows_to_merge.agg(' '.join)
			
 
				+        new_row = rows_to_merge.agg(" ".join)
			
 
				         df_this_table.loc[indices_to_merge[0]] = new_row
			
 
				         df_this_table = df_this_table.drop(indices_to_merge)
			
 
				         new_row = new_row.str.replace("  ", " ")
			
@@ -144,7 +164,7 @@ if __name__ == "__main__":
 
				         df_this_table.columns = new_row
			
 
				 
			
 
				         # remove columns not needed
			
 
				-        if 'remove_cols' in current_table.keys():
			
 
				+        if "remove_cols" in current_table.keys():
			
 
				             df_this_table = df_this_table.drop(columns=current_table["remove_cols"])
			
 
				 
			
 
				         df_this_table = df_this_table.set_index("Year")
			
@@ -155,12 +175,14 @@ if __name__ == "__main__":
 
				         # remove "," (thousand sep) from data
			
 
				         for col in df_this_table.columns:
			
 
				             df_this_table.loc[:, col] = df_this_table.loc[:, col].str.strip()
			
 
				-            def repl(m):
			
 
				-               return m.group('part1') + m.group('part2')
			
 
				-            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
			
 
				-                '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-            df_this_table[col][df_this_table[col].isnull()] = 'NaN'
			
 
				 
			
 
				+            def repl(m):  # noqa: D103
			
 
				+                return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
			
 
				+                "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+            )
			
 
				+            df_this_table[col][df_this_table[col].isna()] = "NaN"
			
 
				 
			
 
				         # metadta in forst col instread of index
			
 
				         df_this_table = df_this_table.reset_index()
			
@@ -170,7 +192,7 @@ if __name__ == "__main__":
 
				         df_this_table.columns = df_this_table.columns.map(str)
			
 
				 
			
 
				         # make copy of columns if a column is used twice for metadata
			
 
				-        if 'copy_cols' in current_table.keys():
			
 
				+        if "copy_cols" in current_table.keys():
			
 
				             for col in current_table["copy_cols"]:
			
 
				                 df_this_table[col] = df_this_table[current_table["copy_cols"][col]]
			
 
				 
			
@@ -184,7 +206,7 @@ if __name__ == "__main__":
 
				             coords_value_mapping=current_table["coords_value_mapping"],
			
 
				             meta_data=meta_data,
			
 
				             convert_str=True,
			
 
				-            time_format='%Y',
			
 
				+            time_format="%Y",
			
 
				         )
			
 
				 
			
 
				         data_current_pm2 = pm2.pm2io.from_interchange_format(data_current_if)
			
@@ -193,7 +215,7 @@ if __name__ == "__main__":
 
				         else:
			
 
				             data_trend_pm2 = data_trend_pm2.pr.merge(data_current_pm2)
			
 
				 
			
 
				-    data_pm2 = data_inv_pm2.pr.merge(data_trend_pm2, tolerance=0.02) # some rounding in
			
 
				+    data_pm2 = data_inv_pm2.pr.merge(data_trend_pm2, tolerance=0.02)  # some rounding in
			
 
				     # trends needs higher tolerance
			
 
				 
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
@@ -205,48 +227,59 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-        data_if)
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] +
			
 
				-                         "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				-
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     #### processing
			
 
				     data_proc_pm2 = data_pm2
			
 
				     terminology_proc = coords_terminologies["category"]
			
 
				 
			
 
				     # combine CO2 emissions and removals
			
 
				-    temp_CO2 = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
			
 
				-        (dim="entity", skipna=True, min_count=1)
			
 
				+    temp_CO2 = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
			
 
				+        dim="entity", skipna=True, min_count=1
			
 
				+    )
			
 
				     data_proc_pm2["CO2"] = data_proc_pm2["CO2"].fillna(temp_CO2)
			
 
				 
			
 
				     # create net KYOTOGHG for 0 and 3
			
 
				-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] \
			
 
				-        = xr.full_like(data_proc_pm2["CO2 removals"],
			
 
				-                       np.nan).pr.quantify(units="Gg CO2 / year")
			
 
				-
			
 
				-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"].attrs = {"entity": "KYOTOGHG",
			
 
				-                                                            "gwp_context": "AR5GWP100"}
			
 
				-    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] \
			
 
				-        = data_proc_pm2.pr.gas_basket_contents_sum(
			
 
				-        basket="KYOTOGHG removals (AR5GWP100)", basket_contents=['CO2 removals'],
			
 
				-        skipna=True, min_count=1)
			
 
				-    temp_KYOTOGHG = data_proc_pm2[["KYOTOGHG emissions (AR5GWP100)",
			
 
				-                                   "KYOTOGHG removals (AR5GWP100)"]].pr.sum\
			
 
				-        (dim="entity", skipna=True, min_count=1)
			
 
				-    data_proc_pm2["KYOTOGHG (AR5GWP100)"] \
			
 
				-        = data_proc_pm2["KYOTOGHG (AR5GWP100)"].fillna(temp_KYOTOGHG)
			
 
				-
			
 
				+    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"] = xr.full_like(
			
 
				+        data_proc_pm2["CO2 removals"], np.nan
			
 
				+    ).pr.quantify(units="Gg CO2 / year")
			
 
				+
			
 
				+    data_proc_pm2["KYOTOGHG removals (AR5GWP100)"].attrs = {
			
 
				+        "entity": "KYOTOGHG",
			
 
				+        "gwp_context": "AR5GWP100",
			
 
				+    }
			
 
				+    data_proc_pm2[
			
 
				+        "KYOTOGHG removals (AR5GWP100)"
			
 
				+    ] = data_proc_pm2.pr.gas_basket_contents_sum(
			
 
				+        basket="KYOTOGHG removals (AR5GWP100)",
			
 
				+        basket_contents=["CO2 removals"],
			
 
				+        skipna=True,
			
 
				+        min_count=1,
			
 
				+    )
			
 
				+    temp_KYOTOGHG = data_proc_pm2[
			
 
				+        ["KYOTOGHG emissions (AR5GWP100)", "KYOTOGHG removals (AR5GWP100)"]
			
 
				+    ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				+    data_proc_pm2["KYOTOGHG (AR5GWP100)"] = data_proc_pm2[
			
 
				+        "KYOTOGHG (AR5GWP100)"
			
 
				+    ].fillna(temp_KYOTOGHG)
			
 
				 
			
 
				     # actual processing
			
 
				     data_proc_pm2 = process_data_for_country(
			
 
				         data_proc_pm2,
			
 
				-        entities_to_ignore=['CO2 emissions', 'CO2 removals',
			
 
				-                            'KYOTOGHG emissions (AR5GWP100)',
			
 
				-                            'KYOTOGHG removals (AR5GWP100)'],
			
 
				+        entities_to_ignore=[
			
 
				+            "CO2 emissions",
			
 
				+            "CO2 removals",
			
 
				+            "KYOTOGHG emissions (AR5GWP100)",
			
 
				+            "KYOTOGHG removals (AR5GWP100)",
			
 
				+        ],
			
 
				         gas_baskets={},
			
 
				         processing_info_country=processing_info_step1,
			
 
				     )
			
@@ -256,16 +289,16 @@ if __name__ == "__main__":
 
				         entities_to_ignore=[],
			
 
				         gas_baskets=gas_baskets,
			
 
				         processing_info_country=processing_info_step2,
			
 
				-        cat_terminology_out = terminology_proc,
			
 
				-        #category_conversion = None,
			
 
				-        #sectors_out = None,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        # category_conversion = None,
			
 
				+        # sectors_out = None,
			
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				     # TODO: processing info is present twice
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -274,9 +307,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Peru's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'PER'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=PER
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py
@@ -1,3 +1,9 @@
 
				+"""Config for Peru's BUR3
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 table_def_templates = {
			
 
				     "300": {  # 300
			
 
				         "area": ["69,457,727,78"],
			
@@ -486,75 +492,80 @@ meta_data = {
 
				 
			
 
				 ## processing
			
 
				 cat_conversion = {
			
 
				-    'mapping': {
			
 
				-        '0': '0',
			
 
				-        '1': '1',
			
 
				-        '1.A': '1.A',
			
 
				-        '1.A.1': '1.A.1',
			
 
				-        '1.A.2': '1.A.2',
			
 
				-        '1.A.3': '1.A.3',
			
 
				-        '1.A.4': '1.A.4',
			
 
				-        '1.A.5': '1.A.5',
			
 
				-        '1.B': '1.B',
			
 
				-        '1.B.1': '1.B.1',
			
 
				-        '1.B.2': '1.B.2',
			
 
				-        '2': '2',
			
 
				-        '2.A': '2.A',
			
 
				-        '2.B': '2.B',
			
 
				-        '2.C': '2.C',
			
 
				-        '2.D': '2.D',
			
 
				-        '2.E': '2.E',
			
 
				-        '2.F': '2.F',
			
 
				-        '2.G': '2.G',
			
 
				-        '2.H': '2.H',
			
 
				-        '3': 'M.AG',
			
 
				-        '3.A': '3.A',
			
 
				-        '3.A.1': '3.A.1',
			
 
				-        '3.A.2': '3.A.2',
			
 
				-        '3.C': '3.C',
			
 
				-        '3.C.1': '3.C.1',
			
 
				-        '3.C.2': '3.C.2',
			
 
				-        '3.C.3': '3.C.3',
			
 
				-        '3.C.4': '3.C.4',
			
 
				-        '3.C.5': '3.C.5',
			
 
				-        '3.C.6': '3.C.6',
			
 
				-        '3.C.7': '3.C.7',
			
 
				-        '4': 'M.LULUCF',
			
 
				-        'M.2006.3.B': '3.B',
			
 
				-        '4.A': '3.B.1',
			
 
				-        '4.B': '3.B.2',
			
 
				-        '4.C': '3.B.3',
			
 
				-        '4.D': '3.B.4',
			
 
				-        '4.E': '3.B.5',
			
 
				-        '4.F': '3.B.6',
			
 
				-        '4.G': '3.D.1',
			
 
				-        '5': '4',
			
 
				-        '5.A': '4.A',
			
 
				-        '5.B': '4.B',
			
 
				-        '5.C': '4.C',
			
 
				-        '5.D': '4.D',
			
 
				-        'M.BK': 'M.BK',
			
 
				-        'M.BK.A': 'M.BK.A',
			
 
				-        'M.BK.M': 'M.BM.M',
			
 
				-        'M.BIO': 'M.BIO',
			
 
				-    },
			
 
				-    'aggregate': {
			
 
				-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
			
 
				-              'name': 'IPPU'},
			
 
				-        'M.3.C.AG': {
			
 
				-            'sources': ['3.C'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
			
 
				-                     'name': 'Agriculture excluding livestock emissions'},
			
 
				-        '3.D': {'sources': ['3.D.1'], 'name': 'Other'},
			
 
				-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    "mapping": {
			
 
				+        "0": "0",
			
 
				+        "1": "1",
			
 
				+        "1.A": "1.A",
			
 
				+        "1.A.1": "1.A.1",
			
 
				+        "1.A.2": "1.A.2",
			
 
				+        "1.A.3": "1.A.3",
			
 
				+        "1.A.4": "1.A.4",
			
 
				+        "1.A.5": "1.A.5",
			
 
				+        "1.B": "1.B",
			
 
				+        "1.B.1": "1.B.1",
			
 
				+        "1.B.2": "1.B.2",
			
 
				+        "2": "2",
			
 
				+        "2.A": "2.A",
			
 
				+        "2.B": "2.B",
			
 
				+        "2.C": "2.C",
			
 
				+        "2.D": "2.D",
			
 
				+        "2.E": "2.E",
			
 
				+        "2.F": "2.F",
			
 
				+        "2.G": "2.G",
			
 
				+        "2.H": "2.H",
			
 
				+        "3": "M.AG",
			
 
				+        "3.A": "3.A",
			
 
				+        "3.A.1": "3.A.1",
			
 
				+        "3.A.2": "3.A.2",
			
 
				+        "3.C": "3.C",
			
 
				+        "3.C.1": "3.C.1",
			
 
				+        "3.C.2": "3.C.2",
			
 
				+        "3.C.3": "3.C.3",
			
 
				+        "3.C.4": "3.C.4",
			
 
				+        "3.C.5": "3.C.5",
			
 
				+        "3.C.6": "3.C.6",
			
 
				+        "3.C.7": "3.C.7",
			
 
				+        "4": "M.LULUCF",
			
 
				+        "M.2006.3.B": "3.B",
			
 
				+        "4.A": "3.B.1",
			
 
				+        "4.B": "3.B.2",
			
 
				+        "4.C": "3.B.3",
			
 
				+        "4.D": "3.B.4",
			
 
				+        "4.E": "3.B.5",
			
 
				+        "4.F": "3.B.6",
			
 
				+        "4.G": "3.D.1",
			
 
				+        "5": "4",
			
 
				+        "5.A": "4.A",
			
 
				+        "5.B": "4.B",
			
 
				+        "5.C": "4.C",
			
 
				+        "5.D": "4.D",
			
 
				+        "M.BK": "M.BK",
			
 
				+        "M.BK.A": "M.BK.A",
			
 
				+        "M.BK.M": "M.BM.M",
			
 
				+        "M.BIO": "M.BIO",
			
 
				+    },
			
 
				+    "aggregate": {
			
 
				+        "2": {
			
 
				+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
			
 
				+            "name": "IPPU",
			
 
				+        },
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": ["3.C"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+        },
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG"],
			
 
				+            "name": "Agriculture excluding livestock emissions",
			
 
				+        },
			
 
				+        "3.D": {"sources": ["3.D.1"], "name": "Other"},
			
 
				+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 processing_info = {
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR4GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
@@ -1,12 +1,17 @@
 
				-# read Singapore fifth BUR from pdf
			
 
				+"""
			
 
				+Read Peru's BUR3 from pdf
			
 
				 
			
 
				+This script reads data from Peru's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import locale
			
 
				 
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_per_bur3 import (
			
 
				+from config_per_bur3 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_conversion,
			
@@ -103,20 +108,22 @@ if __name__ == "__main__":
 
				 
			
 
				             # drop cols if necessary
			
 
				             if "drop_cols" in table_defs[page].keys():
			
 
				-                # print(df_current.columns.values)
			
 
				+                # print(df_current.columns.to_numpy())
			
 
				                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
			
 
				             elif "drop_cols" in table_def_templates[table_on_page].keys():
			
 
				                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
			
 
				 
			
 
				             # rename category column
			
 
				-            df_current.rename(
			
 
				-                columns={table_defs[page]["category_col"]: index_cols[0]}, inplace=True
			
 
				+            df_current = df_current.rename(
			
 
				+                columns={table_defs[page]["category_col"]: index_cols[0]}
			
 
				             )
			
 
				 
			
 
				             # replace double \n
			
 
				             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
			
 
				             # replace double and triple spaces
			
 
				-            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
			
 
				+            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace(
			
 
				+                "   ", " "
			
 
				+            )
			
 
				             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
			
 
				 
			
 
				             # fix the split rows
			
@@ -137,7 +144,7 @@ if __name__ == "__main__":
 
				             # set index
			
 
				             # df_current = df_current.set_index(index_cols)
			
 
				             # strip trailing and leading  and remove "^"
			
 
				-            for col in df_current.columns.values:
			
 
				+            for col in df_current.columns.to_numpy():
			
 
				                 df_current[col] = df_current[col].str.strip()
			
 
				                 df_current[col] = df_current[col].str.replace("^", "")
			
 
				 
			
@@ -147,9 +154,9 @@ if __name__ == "__main__":
 
				                 df_this_page = df_current.copy(deep=True)
			
 
				             else:
			
 
				                 # find intersecting cols
			
 
				-                cols_this_page = df_this_page.columns.values
			
 
				+                cols_this_page = df_this_page.columns.to_numpy()
			
 
				                 # print(f"cols this page: {cols_this_page}")
			
 
				-                cols_current = df_current.columns.values
			
 
				+                cols_current = df_current.columns.to_numpy()
			
 
				                 # print(f"cols current: {cols_current}")
			
 
				                 cols_both = list(set(cols_this_page).intersection(set(cols_current)))
			
 
				                 # print(f"cols both: {cols_both}")
			
@@ -179,7 +186,9 @@ if __name__ == "__main__":
 
				         # drop the rows with memo items etc
			
 
				         for cat in cats_remove:
			
 
				             df_this_page_long = df_this_page_long.drop(
			
 
				-                df_this_page_long.loc[df_this_page_long.loc[:, index_cols[0]] == cat].index
			
 
				+                df_this_page_long.loc[
			
 
				+                    df_this_page_long.loc[:, index_cols[0]] == cat
			
 
				+                ].index
			
 
				             )
			
 
				 
			
 
				         # make a copy of the categories row
			
@@ -187,12 +196,14 @@ if __name__ == "__main__":
 
				 
			
 
				         # replace cat names by codes in col "Categories"
			
 
				         # first the manual replacements
			
 
				-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, "category"].replace(
			
 
				-            cat_codes_manual
			
 
				-        )
			
 
				+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
			
 
				+            :, "category"
			
 
				+        ].replace(cat_codes_manual)
			
 
				+
			
 
				         # then the regex replacements
			
 
				-        def repl(m):
			
 
				+        def repl(m):  # noqa: D103
			
 
				             return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				         df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
			
 
				             :, "category"
			
 
				         ].str.replace(cat_code_regexp, repl, regex=True)
			
@@ -211,8 +222,10 @@ if __name__ == "__main__":
 
				             ".", ""
			
 
				         )
			
 
				         pat = r"^(?P<first>[0-9\.,]*),(?P<last>[0-9\.,]*)$"
			
 
				-        def repl(m):
			
 
				+
			
 
				+        def repl(m):  # noqa: D103
			
 
				             return f"{m.group('first')}.{m.group('last')}"
			
 
				+
			
 
				         df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(
			
 
				             pat, repl, regex=True
			
 
				         )
			
@@ -265,12 +278,11 @@ if __name__ == "__main__":
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				         encoding=encoding,
			
 
				     )
			
 
				 
			
 
				-    #### continue here
			
 
				-
			
 
				     # ###
			
 
				     # ## process the data
			
 
				     # ###
			
@@ -288,7 +300,7 @@ if __name__ == "__main__":
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				     data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
@@ -305,6 +317,7 @@ if __name__ == "__main__":
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				         encoding=encoding,
			
 
				     )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read South Korea's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'KOR'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=KOR
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/config_kor_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/config_kor_bur4.py
@@ -1,413 +1,513 @@
 
				+"""Config for South Korea's 2021 and 2022 inventories and BUR4
			
 
				+
			
 
				+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
			
 
				+config and metadata are define din the reading script
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 original_names = [
			
 
				-    '총배출량',
			
 
				-    '순배출량',
			
 
				-    '에너지',
			
 
				-    'A. 연료연소',
			
 
				-    '1. 에너지산업',
			
 
				-    'a. 공공전기 및 열 생산',
			
 
				-    'b. 석유정제',
			
 
				-    'c. 고체연료 제조 및 기타 에너지 산업',
			
 
				-    '2. 제조업 및 건설업',
			
 
				-    'a. 철강',
			
 
				-    'b. 비철금속',
			
 
				-    'c. 화학',
			
 
				-    'd. 펄프, 제지 및 인쇄',
			
 
				-    'e. 식음료품 가공 및 담배 제조',
			
 
				-    'f. 기타',
			
 
				-    '  1. 비금속',
			
 
				-    '  2. 조립금속',
			
 
				-    '  3. 나무 및 목재',
			
 
				-    '  4. 건설',
			
 
				-    '  5. 섬유 및 가죽',
			
 
				-    '  6. 기타제조',
			
 
				-    '3. 수송',
			
 
				-    'a. 민간항공',
			
 
				-    'b. 도로수송',
			
 
				-    'c. 철도',
			
 
				-    'd. 해운',
			
 
				-    'e. 기타수송',
			
 
				-    '4. 기타',
			
 
				-    'a. 상업/공공',
			
 
				-    'b. 가정',
			
 
				-    'c. 농업/임업/어업',
			
 
				-    '5. 미분류',
			
 
				-    'B. 탈루',
			
 
				-    '1. 고체연료',
			
 
				-    '2.  석유 및 천연가스',
			
 
				-    'a.  석유',
			
 
				-    'b. 천연가스',
			
 
				-    '산업공정',
			
 
				-    'A. 광물산업',
			
 
				-    '1. 시멘트생산',
			
 
				-    '2. 석회생산',
			
 
				-    '3. 석회석 및 백운석 소비',
			
 
				-    '4. 소다회 생산 및 소비',
			
 
				-    '5. 아스팔트 루핑',
			
 
				-    '6. 아스팔트 도로포장',
			
 
				-    'B. 화학산업',
			
 
				-    'C. 금속산업',
			
 
				-    '1. 철강생산',
			
 
				-    '2. 합금철 생산',
			
 
				-    '3. 알루미늄 생산',
			
 
				-    '4. 마그네슘 생산의 SF6 소비',
			
 
				-    'D. 기타산업',
			
 
				-    'E. 할로카본 및 육불화황 생산',
			
 
				-    '1. 부산물 배출',
			
 
				-    '2. 탈루 배출',
			
 
				-    'F. 할로카본 및 육불화황 소비',
			
 
				-    '1.  냉장 및 냉방',
			
 
				-    '2.  발포제',
			
 
				-    '3.  소화기',
			
 
				-    '4.  에어로졸',
			
 
				-    '5.  용매',
			
 
				-    '6.  기타 용도의 ODS 대체물질 사용',
			
 
				-    '7.  반도체 제조',
			
 
				-    '8.  중전기기',
			
 
				-    '9.  기타(잠재배출량)',
			
 
				-    '농업',
			
 
				-    'A.  장내발효',
			
 
				-    '1. 소',
			
 
				-    '2. 물소',
			
 
				-    '3. 양(면양)',
			
 
				-    '4. 양(산양)',
			
 
				-    '5. 낙타 및 라마',
			
 
				-    '6. 말',
			
 
				-    '7. 노새 및 당나귀',
			
 
				-    '8. 돼지',
			
 
				-    '9. 가금류',
			
 
				-    '10. 기타 가축(사슴)',
			
 
				-    'B.  가축분뇨처리',
			
 
				-    '1. 소',
			
 
				-    '2. 물소',
			
 
				-    '3. 양(면양)',
			
 
				-    '4. 양(산양)',
			
 
				-    '5. 낙타 및 라마',
			
 
				-    '6. 말',
			
 
				-    '7. 노새 및 당나귀',
			
 
				-    '8. 돼지',
			
 
				-    '9. 가금류',
			
 
				-    '10. 기타 가축(사슴)',
			
 
				-    'C.  벼재배',
			
 
				-    '1. 관개',
			
 
				-    '2. 천수답',
			
 
				-    'D. 농경지토양',
			
 
				-    '1. 직접배출',
			
 
				-    '2. 목장, 방목구역, 분료(거름)',
			
 
				-    '3. 간접배출',
			
 
				-    'E. 사바나 소각',
			
 
				-    'F. 작물잔사소각',
			
 
				-    '1. 곡류',
			
 
				-    '2. 두류(콩)',
			
 
				-    '3. 근채류',
			
 
				-    '4. 사탕수수',
			
 
				-    '5. 기타',
			
 
				-    'LULUCF',
			
 
				-    'A. 산림지',
			
 
				-    '1. 산림지로 유지된 산림지',
			
 
				-    '2. 타토지에서 전용된 산림지',
			
 
				-    '3. 산림지에서 질소 시비로 인한 N2O 배출',
			
 
				-    '4. 산림지에서 배수로 인한 Non-CO2 배출',
			
 
				-    '5. 산림지에서 바이오매스 연소에 의한 배출',
			
 
				-    'B. 농경지',
			
 
				-    '1. 농경지로 유지된 농경지',
			
 
				-    '2. 타토지에서 전용된 농경지',
			
 
				-    '3. 농경지로의 전용에 따른 N2O 배출',
			
 
				-    '4. 농경지에서 농업용 석회시용으로 인한 CO2 배출',
			
 
				-    '5. 농경지에서 바이오매스 연소에 의한 배출',
			
 
				-    'C. 초지',
			
 
				-    '1. 초지로 유지된 초지',
			
 
				-    '2. 타토지에서 전용된 초지',
			
 
				-    '3. 초지에서 농업용 석회시용으로 인한 CO2 배출',
			
 
				-    '4. 초지에서 바이오매스 연소에 의한 배출',
			
 
				-    'D. 습지',
			
 
				-    '1. 습지로 유지된 습지',
			
 
				-    '2. 타토지에서 전용된 습지',
			
 
				-    '3. 습지에서 배수로 인한 Non-CO2 배출',
			
 
				-    '4. 습지에서 바이오매스 연소에 의한 배출',
			
 
				-    'E. 정주지',
			
 
				-    'F. 기타토지',
			
 
				-    '폐기물',
			
 
				-    'A. 폐기물매립',
			
 
				-    '1. 관리형 매립',
			
 
				-    '2. 비관리형 매립',
			
 
				-    'B. 하폐수처리',
			
 
				-    '1. 폐수처리',
			
 
				-    '2. 하수처리',
			
 
				-    'C. 폐기물소각',
			
 
				-    'D. 기타',
			
 
				-    '별도항목(Memo Item)',
			
 
				-    '분야·부문/연도',
			
 
				-    'C. 국제벙커링 및 다국적 작전',
			
 
				-    '1. 벙커링',
			
 
				-    'a. 국제 항공',
			
 
				-    'b. 국제 해운',
			
 
				-    '2. 다국적 작전',
			
 
				-    '* 참고 : NO = 배출활동 및 공정이 없는 경우, NE = 산정하지 아니하는 경우, NA = 자연적, 이론적으로 발생하지 않는 활동 및 공정의 경우, IE = 다른 항목에 포함하여 보고하는 경우, C = 기밀정보인 경우',
			
 
				-    '3. 타토지로 전용된 농경지', # start of new codes in 2021 inventory
			
 
				-    '4. 농경지로의 전용에 따른 N2O 배출',
			
 
				-    '5. 농경지에서 농업용 석회시용으로 인한 CO2 배출',
			
 
				-    '6. 농경지에서 바이오매스 연소에 의한 배출',
			
 
				-    'G. 기타',
			
 
				+    "총배출량",
			
 
				+    "순배출량",
			
 
				+    "에너지",
			
 
				+    "A. 연료연소",
			
 
				+    "1. 에너지산업",
			
 
				+    "a. 공공전기 및 열 생산",
			
 
				+    "b. 석유정제",
			
 
				+    "c. 고체연료 제조 및 기타 에너지 산업",
			
 
				+    "2. 제조업 및 건설업",
			
 
				+    "a. 철강",
			
 
				+    "b. 비철금속",
			
 
				+    "c. 화학",
			
 
				+    "d. 펄프, 제지 및 인쇄",
			
 
				+    "e. 식음료품 가공 및 담배 제조",
			
 
				+    "f. 기타",
			
 
				+    "  1. 비금속",
			
 
				+    "  2. 조립금속",
			
 
				+    "  3. 나무 및 목재",
			
 
				+    "  4. 건설",
			
 
				+    "  5. 섬유 및 가죽",
			
 
				+    "  6. 기타제조",
			
 
				+    "3. 수송",
			
 
				+    "a. 민간항공",
			
 
				+    "b. 도로수송",
			
 
				+    "c. 철도",
			
 
				+    "d. 해운",
			
 
				+    "e. 기타수송",
			
 
				+    "4. 기타",
			
 
				+    "a. 상업/공공",
			
 
				+    "b. 가정",
			
 
				+    "c. 농업/임업/어업",
			
 
				+    "5. 미분류",
			
 
				+    "B. 탈루",
			
 
				+    "1. 고체연료",
			
 
				+    "2.  석유 및 천연가스",
			
 
				+    "a.  석유",
			
 
				+    "b. 천연가스",
			
 
				+    "산업공정",
			
 
				+    "A. 광물산업",
			
 
				+    "1. 시멘트생산",
			
 
				+    "2. 석회생산",
			
 
				+    "3. 석회석 및 백운석 소비",
			
 
				+    "4. 소다회 생산 및 소비",
			
 
				+    "5. 아스팔트 루핑",
			
 
				+    "6. 아스팔트 도로포장",
			
 
				+    "B. 화학산업",
			
 
				+    "C. 금속산업",
			
 
				+    "1. 철강생산",
			
 
				+    "2. 합금철 생산",
			
 
				+    "3. 알루미늄 생산",
			
 
				+    "4. 마그네슘 생산의 SF6 소비",
			
 
				+    "D. 기타산업",
			
 
				+    "E. 할로카본 및 육불화황 생산",
			
 
				+    "1. 부산물 배출",
			
 
				+    "2. 탈루 배출",
			
 
				+    "F. 할로카본 및 육불화황 소비",
			
 
				+    "1.  냉장 및 냉방",
			
 
				+    "2.  발포제",
			
 
				+    "3.  소화기",
			
 
				+    "4.  에어로졸",
			
 
				+    "5.  용매",
			
 
				+    "6.  기타 용도의 ODS 대체물질 사용",
			
 
				+    "7.  반도체 제조",
			
 
				+    "8.  중전기기",
			
 
				+    "9.  기타(잠재배출량)",
			
 
				+    "농업",
			
 
				+    "A.  장내발효",
			
 
				+    "1. 소",
			
 
				+    "2. 물소",
			
 
				+    "3. 양(면양)",
			
 
				+    "4. 양(산양)",
			
 
				+    "5. 낙타 및 라마",
			
 
				+    "6. 말",
			
 
				+    "7. 노새 및 당나귀",
			
 
				+    "8. 돼지",
			
 
				+    "9. 가금류",
			
 
				+    "10. 기타 가축(사슴)",
			
 
				+    "B.  가축분뇨처리",
			
 
				+    "1. 소",
			
 
				+    "2. 물소",
			
 
				+    "3. 양(면양)",
			
 
				+    "4. 양(산양)",
			
 
				+    "5. 낙타 및 라마",
			
 
				+    "6. 말",
			
 
				+    "7. 노새 및 당나귀",
			
 
				+    "8. 돼지",
			
 
				+    "9. 가금류",
			
 
				+    "10. 기타 가축(사슴)",
			
 
				+    "C.  벼재배",
			
 
				+    "1. 관개",
			
 
				+    "2. 천수답",
			
 
				+    "D. 농경지토양",
			
 
				+    "1. 직접배출",
			
 
				+    "2. 목장, 방목구역, 분료(거름)",
			
 
				+    "3. 간접배출",
			
 
				+    "E. 사바나 소각",
			
 
				+    "F. 작물잔사소각",
			
 
				+    "1. 곡류",
			
 
				+    "2. 두류(콩)",
			
 
				+    "3. 근채류",
			
 
				+    "4. 사탕수수",
			
 
				+    "5. 기타",
			
 
				+    "LULUCF",
			
 
				+    "A. 산림지",
			
 
				+    "1. 산림지로 유지된 산림지",
			
 
				+    "2. 타토지에서 전용된 산림지",
			
 
				+    "3. 산림지에서 질소 시비로 인한 N2O 배출",
			
 
				+    "4. 산림지에서 배수로 인한 Non-CO2 배출",
			
 
				+    "5. 산림지에서 바이오매스 연소에 의한 배출",
			
 
				+    "B. 농경지",
			
 
				+    "1. 농경지로 유지된 농경지",
			
 
				+    "2. 타토지에서 전용된 농경지",
			
 
				+    "3. 농경지로의 전용에 따른 N2O 배출",
			
 
				+    "4. 농경지에서 농업용 석회시용으로 인한 CO2 배출",
			
 
				+    "5. 농경지에서 바이오매스 연소에 의한 배출",
			
 
				+    "C. 초지",
			
 
				+    "1. 초지로 유지된 초지",
			
 
				+    "2. 타토지에서 전용된 초지",
			
 
				+    "3. 초지에서 농업용 석회시용으로 인한 CO2 배출",
			
 
				+    "4. 초지에서 바이오매스 연소에 의한 배출",
			
 
				+    "D. 습지",
			
 
				+    "1. 습지로 유지된 습지",
			
 
				+    "2. 타토지에서 전용된 습지",
			
 
				+    "3. 습지에서 배수로 인한 Non-CO2 배출",
			
 
				+    "4. 습지에서 바이오매스 연소에 의한 배출",
			
 
				+    "E. 정주지",
			
 
				+    "F. 기타토지",
			
 
				+    "폐기물",
			
 
				+    "A. 폐기물매립",
			
 
				+    "1. 관리형 매립",
			
 
				+    "2. 비관리형 매립",
			
 
				+    "B. 하폐수처리",
			
 
				+    "1. 폐수처리",
			
 
				+    "2. 하수처리",
			
 
				+    "C. 폐기물소각",
			
 
				+    "D. 기타",
			
 
				+    "별도항목(Memo Item)",
			
 
				+    "분야·부문/연도",
			
 
				+    "C. 국제벙커링 및 다국적 작전",
			
 
				+    "1. 벙커링",
			
 
				+    "a. 국제 항공",
			
 
				+    "b. 국제 해운",
			
 
				+    "2. 다국적 작전",
			
 
				+    "* 참고 : NO = 배출활동 및 공정이 없는 경우, NE = 산정하지 아니하는 경우, NA = 자연적, "
			
 
				+    "이론적으로 발생하지 않는 활동 및 공정의 경우, IE = 다른 항목에 포함하여 보고하는 경우, "
			
 
				+    "C = 기밀정보인 경우",
			
 
				+    "3. 타토지로 전용된 농경지",  # start of new codes in 2021 inventory
			
 
				+    "4. 농경지로의 전용에 따른 N2O 배출",
			
 
				+    "5. 농경지에서 농업용 석회시용으로 인한 CO2 배출",
			
 
				+    "6. 농경지에서 바이오매스 연소에 의한 배출",
			
 
				+    "G. 기타",
			
 
				 ]
			
 
				 translations = [
			
 
				-    ['Total emissions', 'M.0.EL'],
			
 
				-    ['Net emissions', '0'],
			
 
				-    ['energy', '1'],
			
 
				-    ['A. Fuel combustion', '1.A'],
			
 
				-    ['1. Energy industry', '1.A.1'],
			
 
				-    ['a. Public electricity and heat production', '1.A.1.a'],
			
 
				-    ['b. Oil refining', '1.A.1.b'],
			
 
				-    ['c. Solid fuel manufacturing and other energy industries', '1.A.1.c'],
			
 
				-    ['2. Manufacturing and construction', '1.A.2'],
			
 
				-    ['a. steel', '1.A.2.a'],
			
 
				-    ['b. Non-ferrous metal', '1.A.2.b'],
			
 
				-    ['c. chemistry', '1.A.2.c'],
			
 
				-    ['d. Pulp, paper and printing', '1.A.2.d'],
			
 
				-    ['e. Food and beverage processing and tobacco manufacturing', '1.A.2.e'],
			
 
				-    ['f. Etc', '1.A.2.f'],
			
 
				-    ['  1. Non-metal', '1.A.2.f.1'],
			
 
				-    ['  2. Assembly metal', '1.A.2.f.2'],
			
 
				-    ['  3. Wood and timber', '1.A.2.f.3'],
			
 
				-    ['  4. Construction', '1.A.2.f.4'],
			
 
				-    ['  5. Textile and leather', '1.A.2.f.5'],
			
 
				-    ['  6. Other manufacturing', '1.A.2.f.6'],
			
 
				-    ['3. Transportation', '1.A.3'],
			
 
				-    ['a. Civil aviation', '1.A.3.a.2'],
			
 
				-    ['b. Road transport', '1.A.3.b'],
			
 
				-    ['c. railroad', '1.A.3.c'],
			
 
				-    ['d. shipping', '1.A.3.d.2'],
			
 
				-    ['e. Other transport', '1.A.3.e'],
			
 
				-    ['4. Other', '1.A.4'],
			
 
				-    ['a. Commercial/Public', '1.A.4.a'],
			
 
				-    ['b. home', '1.A.4.b'],
			
 
				-    ['c. Agriculture/Forestry/Fishing', '1.A.4.c'],
			
 
				-    ['5. Uncategorized', '1.A.5'],
			
 
				-    ['B. Talu', '1.B'],
			
 
				-    ['1. Solid fuel', '1.B.1'],
			
 
				-    ['2. Oil and natural gas', '1.B.2'],
			
 
				-    ['a. oil', '1.B.2.a'],
			
 
				-    ['b. Natural gas', '1.B.2.b'],
			
 
				-    ['Industrial process', '2'],
			
 
				-    ['A. Mineral industry', '2.A'],
			
 
				-    ['1. Cement production', '2.A.1'],
			
 
				-    ['2. Lime production', '2.A.2'],
			
 
				-    ['3. Limestone and Dolomite Consumption', '2.A.3'],
			
 
				-    ['4. Soda ash production and consumption', '2.A.4'],
			
 
				-    ['5. Asphalt roofing', '2.A.5'],
			
 
				-    ['6. Asphalt road pavement', '2.A.6'],
			
 
				-    ['B. Chemical industry', '2.B'],
			
 
				-    ['C. Metal Industry', '2.C'],
			
 
				-    ['1. Steel production', '2.C.1'],
			
 
				-    ['2. Ferroalloy production', '2.C.2'],
			
 
				-    ['3. Aluminum production', '2.C.3'],
			
 
				-    ['4. SF6 consumption in magnesium production', '2.C.4'],
			
 
				-    ['D. Other industries', '2.D'],
			
 
				-    ['E. Production of halocarbons and sulfur hexafluoride', '2.E'],
			
 
				-    ['1. Emission of by-products', '2.E.1'],
			
 
				-    ['2. Fugitive discharge', '2.E.2'],
			
 
				-    ['F. Consumption of halocarbons and sulfur hexafluoride', '2.F'],
			
 
				-    ['1. Refrigeration and cooling', '2.F.1'],
			
 
				-    ['2. Foaming agent', '2.F.2'],
			
 
				-    ['3. Fire extinguisher', '2.F.3'],
			
 
				-    ['4. Aerosol', '2.F.4'],
			
 
				-    ['5. Solvent', '2.F.5'],
			
 
				-    ['6. Use of ODS substitutes for other purposes', '2.F.6'],
			
 
				-    ['7. Semiconductor manufacturing', '2.F.7'],
			
 
				-    ['8. Heavy electric machine', '2.F.8'],
			
 
				-    ['9. Others (potential emissions)', '2.F.9'],
			
 
				-    ['Agriculture', '4'],
			
 
				-    ['A. Intestinal fermentation', '4.A'],
			
 
				-    ['1. cow', '4.A.1'],
			
 
				-    ['2. Water buffalo', '4.A.2'],
			
 
				-    ['3. Sheep (Cotton Sheep)', '4.A.3'],
			
 
				-    ['4. Sheep (Goat)', '4.A.4'],
			
 
				-    ['5. Camel and Llama', '4.A.5'],
			
 
				-    ['6. Horse', '4.A.6'],
			
 
				-    ['7. Mules and Donkeys', '4.A.7'],
			
 
				-    ['8. Pig', '4.A.8'],
			
 
				-    ['9. Poultry', '4.A.9'],
			
 
				-    ['10. Other livestock (deer)', '4.A.10'],
			
 
				-    ['B. Livestock manure treatment', '4.B'],
			
 
				-    ['1. cow', '4.B.1'],
			
 
				-    ['2. Water buffalo', '4.B.2'],
			
 
				-    ['3. Sheep (Cotton Sheep)', '4.B.3'],
			
 
				-    ['4. Sheep (Goat)', '4.B.4'],
			
 
				-    ['5. Camel and Llama', '4.B.5'],
			
 
				-    ['6. Horse', '4.B.6'],
			
 
				-    ['7. Mules and Donkeys', '4.B.7'],
			
 
				-    ['8. Pig', '4.B.8'],
			
 
				-    ['9. Poultry', '4.B.9'],
			
 
				-    ['10. Other livestock (deer)', '4.B.10'],
			
 
				-    ['C. Rice cultivation', '4.C'],
			
 
				-    ['1. irrigation', '4.C.1'],
			
 
				-    ['2. Thousand answers', '4.C.4'],
			
 
				-    ['D. Cropland soil', '4.D'],
			
 
				-    ['1. Direct discharge', '4.D.1'],
			
 
				-    ['2. Ranch, grazing area, manure (manure)', '4.D.2'],
			
 
				-    ['3. Indirect emissions', '4.D.3'],
			
 
				-    ['E. Savannah incineration', '4.E'],
			
 
				-    ['F. Crop residue incineration', '4.F'],
			
 
				-    ['1. Grains', '4.F.1'],
			
 
				-    ['2. Beans (beans)', '4.F.2'],
			
 
				-    ['3. Root vegetables', '4.F.3'],
			
 
				-    ['4. Sugar cane', '4.F.4'],
			
 
				-    ['5. Other', '4.F.5'],
			
 
				-    ['LULUCF', '5'],
			
 
				-    ['A. Forest land', '5.A'],
			
 
				-    ['1. Forest land maintained as a forest land', '5.A.1'],  # categories differ from IPCC1996
			
 
				-    ['2. Forest land converted from other lands', '5.A.2'],  # categories differ from IPCC1996
			
 
				-    ['3. N2O emissions from nitrogen fertilization in forest areas', '5.A.3'],  # categories differ from IPCC1996
			
 
				-    ['4. Non-CO2 emission due to drainage in forest areas', '5.A.4'],  # categories differ from IPCC1996
			
 
				-    ['5. Emissions from biomass combustion in forest areas', '5.A.5'],  # categories differ from IPCC1996
			
 
				-    ['B. Cropland', '5.B'],
			
 
				-    ['1. Agricultural land maintained as agricultural land', '5.B.1'],  # categories differ from IPCC1996
			
 
				-    ['2. Cropland converted from other lands', '5.B.2'],  # categories differ from IPCC1996
			
 
				-    ['3. N2O emission due to conversion to agricultural land', '5.B.3'],  # categories differ from IPCC1996
			
 
				-    ['4. CO2 emission from agricultural lime application in agricultural land', '5.B.4'],  # categories differ from IPCC1996
			
 
				-    ['5. Emissions from biomass combustion in agricultural land', '5.B.5'],  # categories differ from IPCC1996
			
 
				-    ['C. Grassland', '5.C'],
			
 
				-    ['1. Grassland maintained as grassland', '5.C.1'],  # categories differ from IPCC1996
			
 
				-    ['2. Grassland dedicated to Tatoji', '5.C.2'],  # categories differ from IPCC1996
			
 
				-    ['3. CO2 emission from agricultural lime application in grassland', '5.C.3'],  # categories differ from IPCC1996
			
 
				-    ['4. Emissions from biomass combustion in grassland', '5.C.4'],  # categories differ from IPCC1996
			
 
				-    ['D. Wetlands', '5.D'],
			
 
				-    ['1. Wetlands maintained as wetlands', '5.D.1'],  # categories differ from IPCC1996
			
 
				-    ['2. Wetlands converted from Tatoji', '5.D.2'],  # categories differ from IPCC1996
			
 
				-    ['3. Non-CO2 emission due to drainage in wetlands', '5.D.3'],  # categories differ from IPCC1996
			
 
				-    ['4. Emissions from biomass combustion in wetlands', '5.D.4'],  # categories differ from IPCC1996
			
 
				-    ['E. Jeongju-ji', '5.E'],
			
 
				-    ['F. Other land', '5.F'],
			
 
				-    ['waste', '6'],
			
 
				-    ['A. Landfill of waste', '6.A'],
			
 
				-    ['1. Managed landfill', '6.A.1'],
			
 
				-    ['2. Unmanaged landfill', '6.A.2'],
			
 
				-    ['B. Sewage water treatment', '6.B'],
			
 
				-    ['1. Wastewater treatment', '6.B.1'],  # categories differ from IPCC1996
			
 
				-    ['2. Sewage treatment', '6.B.2'],  # categories differ from IPCC1996
			
 
				-    ['C. Waste incineration', '6.C'],
			
 
				-    ['D. Other', '6.D'],
			
 
				-    ['Memo Item', '\\IGNORE'],
			
 
				-    ['Field·Sector/Year', '\\IGNORE'],
			
 
				-    ['C. International bunkering and multinational operations', '\\IGNORE'],
			
 
				-    ['1. Bunkering', 'M.1'],
			
 
				-    ['a. International aviation', 'M.1.A'],
			
 
				-    ['b. International shipping', 'M.1.B'],
			
 
				-    ['2. Multinational operations', 'M.2'],
			
 
				-    ['', '\\IGNORE'],
			
 
				-    ['3. Farmland converted to Tato land', '5.B.3'],  # new codes in 2021 inventory start here
			
 
				-    ['4. N2O emission due to conversion to agricultural land', '5.B.4'],
			
 
				-    ['5. CO2 emission from agricultural lime application in agricultural land', '5.B.5'],
			
 
				-    ['6. Emissions from burning biomass on agricultural land', '5.B.6'],
			
 
				-    ['G. Others', '5.G'],
			
 
				+    ["Total emissions", "M.0.EL"],
			
 
				+    ["Net emissions", "0"],
			
 
				+    ["energy", "1"],
			
 
				+    ["A. Fuel combustion", "1.A"],
			
 
				+    ["1. Energy industry", "1.A.1"],
			
 
				+    ["a. Public electricity and heat production", "1.A.1.a"],
			
 
				+    ["b. Oil refining", "1.A.1.b"],
			
 
				+    ["c. Solid fuel manufacturing and other energy industries", "1.A.1.c"],
			
 
				+    ["2. Manufacturing and construction", "1.A.2"],
			
 
				+    ["a. steel", "1.A.2.a"],
			
 
				+    ["b. Non-ferrous metal", "1.A.2.b"],
			
 
				+    ["c. chemistry", "1.A.2.c"],
			
 
				+    ["d. Pulp, paper and printing", "1.A.2.d"],
			
 
				+    ["e. Food and beverage processing and tobacco manufacturing", "1.A.2.e"],
			
 
				+    ["f. Etc", "1.A.2.f"],
			
 
				+    ["  1. Non-metal", "1.A.2.f.1"],
			
 
				+    ["  2. Assembly metal", "1.A.2.f.2"],
			
 
				+    ["  3. Wood and timber", "1.A.2.f.3"],
			
 
				+    ["  4. Construction", "1.A.2.f.4"],
			
 
				+    ["  5. Textile and leather", "1.A.2.f.5"],
			
 
				+    ["  6. Other manufacturing", "1.A.2.f.6"],
			
 
				+    ["3. Transportation", "1.A.3"],
			
 
				+    ["a. Civil aviation", "1.A.3.a.2"],
			
 
				+    ["b. Road transport", "1.A.3.b"],
			
 
				+    ["c. railroad", "1.A.3.c"],
			
 
				+    ["d. shipping", "1.A.3.d.2"],
			
 
				+    ["e. Other transport", "1.A.3.e"],
			
 
				+    ["4. Other", "1.A.4"],
			
 
				+    ["a. Commercial/Public", "1.A.4.a"],
			
 
				+    ["b. home", "1.A.4.b"],
			
 
				+    ["c. Agriculture/Forestry/Fishing", "1.A.4.c"],
			
 
				+    ["5. Uncategorized", "1.A.5"],
			
 
				+    ["B. Talu", "1.B"],
			
 
				+    ["1. Solid fuel", "1.B.1"],
			
 
				+    ["2. Oil and natural gas", "1.B.2"],
			
 
				+    ["a. oil", "1.B.2.a"],
			
 
				+    ["b. Natural gas", "1.B.2.b"],
			
 
				+    ["Industrial process", "2"],
			
 
				+    ["A. Mineral industry", "2.A"],
			
 
				+    ["1. Cement production", "2.A.1"],
			
 
				+    ["2. Lime production", "2.A.2"],
			
 
				+    ["3. Limestone and Dolomite Consumption", "2.A.3"],
			
 
				+    ["4. Soda ash production and consumption", "2.A.4"],
			
 
				+    ["5. Asphalt roofing", "2.A.5"],
			
 
				+    ["6. Asphalt road pavement", "2.A.6"],
			
 
				+    ["B. Chemical industry", "2.B"],
			
 
				+    ["C. Metal Industry", "2.C"],
			
 
				+    ["1. Steel production", "2.C.1"],
			
 
				+    ["2. Ferroalloy production", "2.C.2"],
			
 
				+    ["3. Aluminum production", "2.C.3"],
			
 
				+    ["4. SF6 consumption in magnesium production", "2.C.4"],
			
 
				+    ["D. Other industries", "2.D"],
			
 
				+    ["E. Production of halocarbons and sulfur hexafluoride", "2.E"],
			
 
				+    ["1. Emission of by-products", "2.E.1"],
			
 
				+    ["2. Fugitive discharge", "2.E.2"],
			
 
				+    ["F. Consumption of halocarbons and sulfur hexafluoride", "2.F"],
			
 
				+    ["1. Refrigeration and cooling", "2.F.1"],
			
 
				+    ["2. Foaming agent", "2.F.2"],
			
 
				+    ["3. Fire extinguisher", "2.F.3"],
			
 
				+    ["4. Aerosol", "2.F.4"],
			
 
				+    ["5. Solvent", "2.F.5"],
			
 
				+    ["6. Use of ODS substitutes for other purposes", "2.F.6"],
			
 
				+    ["7. Semiconductor manufacturing", "2.F.7"],
			
 
				+    ["8. Heavy electric machine", "2.F.8"],
			
 
				+    ["9. Others (potential emissions)", "2.F.9"],
			
 
				+    ["Agriculture", "4"],
			
 
				+    ["A. Intestinal fermentation", "4.A"],
			
 
				+    ["1. cow", "4.A.1"],
			
 
				+    ["2. Water buffalo", "4.A.2"],
			
 
				+    ["3. Sheep (Cotton Sheep)", "4.A.3"],
			
 
				+    ["4. Sheep (Goat)", "4.A.4"],
			
 
				+    ["5. Camel and Llama", "4.A.5"],
			
 
				+    ["6. Horse", "4.A.6"],
			
 
				+    ["7. Mules and Donkeys", "4.A.7"],
			
 
				+    ["8. Pig", "4.A.8"],
			
 
				+    ["9. Poultry", "4.A.9"],
			
 
				+    ["10. Other livestock (deer)", "4.A.10"],
			
 
				+    ["B. Livestock manure treatment", "4.B"],
			
 
				+    ["1. cow", "4.B.1"],
			
 
				+    ["2. Water buffalo", "4.B.2"],
			
 
				+    ["3. Sheep (Cotton Sheep)", "4.B.3"],
			
 
				+    ["4. Sheep (Goat)", "4.B.4"],
			
 
				+    ["5. Camel and Llama", "4.B.5"],
			
 
				+    ["6. Horse", "4.B.6"],
			
 
				+    ["7. Mules and Donkeys", "4.B.7"],
			
 
				+    ["8. Pig", "4.B.8"],
			
 
				+    ["9. Poultry", "4.B.9"],
			
 
				+    ["10. Other livestock (deer)", "4.B.10"],
			
 
				+    ["C. Rice cultivation", "4.C"],
			
 
				+    ["1. irrigation", "4.C.1"],
			
 
				+    ["2. Thousand answers", "4.C.4"],
			
 
				+    ["D. Cropland soil", "4.D"],
			
 
				+    ["1. Direct discharge", "4.D.1"],
			
 
				+    ["2. Ranch, grazing area, manure (manure)", "4.D.2"],
			
 
				+    ["3. Indirect emissions", "4.D.3"],
			
 
				+    ["E. Savannah incineration", "4.E"],
			
 
				+    ["F. Crop residue incineration", "4.F"],
			
 
				+    ["1. Grains", "4.F.1"],
			
 
				+    ["2. Beans (beans)", "4.F.2"],
			
 
				+    ["3. Root vegetables", "4.F.3"],
			
 
				+    ["4. Sugar cane", "4.F.4"],
			
 
				+    ["5. Other", "4.F.5"],
			
 
				+    ["LULUCF", "5"],
			
 
				+    ["A. Forest land", "5.A"],
			
 
				+    [
			
 
				+        "1. Forest land maintained as a forest land",
			
 
				+        "5.A.1",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "2. Forest land converted from other lands",
			
 
				+        "5.A.2",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "3. N2O emissions from nitrogen fertilization in forest areas",
			
 
				+        "5.A.3",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "4. Non-CO2 emission due to drainage in forest areas",
			
 
				+        "5.A.4",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "5. Emissions from biomass combustion in forest areas",
			
 
				+        "5.A.5",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    ["B. Cropland", "5.B"],
			
 
				+    [
			
 
				+        "1. Agricultural land maintained as agricultural land",
			
 
				+        "5.B.1",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "2. Cropland converted from other lands",
			
 
				+        "5.B.2",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "3. N2O emission due to conversion to agricultural land",
			
 
				+        "5.B.3",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "4. CO2 emission from agricultural lime application in agricultural land",
			
 
				+        "5.B.4",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "5. Emissions from biomass combustion in agricultural land",
			
 
				+        "5.B.5",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    ["C. Grassland", "5.C"],
			
 
				+    [
			
 
				+        "1. Grassland maintained as grassland",
			
 
				+        "5.C.1",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    ["2. Grassland dedicated to Tatoji", "5.C.2"],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "3. CO2 emission from agricultural lime application in grassland",
			
 
				+        "5.C.3",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "4. Emissions from biomass combustion in grassland",
			
 
				+        "5.C.4",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    ["D. Wetlands", "5.D"],
			
 
				+    ["1. Wetlands maintained as wetlands", "5.D.1"],  # categories differ from IPCC1996
			
 
				+    ["2. Wetlands converted from Tatoji", "5.D.2"],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "3. Non-CO2 emission due to drainage in wetlands",
			
 
				+        "5.D.3",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    [
			
 
				+        "4. Emissions from biomass combustion in wetlands",
			
 
				+        "5.D.4",
			
 
				+    ],  # categories differ from IPCC1996
			
 
				+    ["E. Jeongju-ji", "5.E"],
			
 
				+    ["F. Other land", "5.F"],
			
 
				+    ["waste", "6"],
			
 
				+    ["A. Landfill of waste", "6.A"],
			
 
				+    ["1. Managed landfill", "6.A.1"],
			
 
				+    ["2. Unmanaged landfill", "6.A.2"],
			
 
				+    ["B. Sewage water treatment", "6.B"],
			
 
				+    ["1. Wastewater treatment", "6.B.1"],  # categories differ from IPCC1996
			
 
				+    ["2. Sewage treatment", "6.B.2"],  # categories differ from IPCC1996
			
 
				+    ["C. Waste incineration", "6.C"],
			
 
				+    ["D. Other", "6.D"],
			
 
				+    ["Memo Item", "\\IGNORE"],
			
 
				+    ["Field·Sector/Year", "\\IGNORE"],
			
 
				+    ["C. International bunkering and multinational operations", "\\IGNORE"],
			
 
				+    ["1. Bunkering", "M.1"],
			
 
				+    ["a. International aviation", "M.1.A"],
			
 
				+    ["b. International shipping", "M.1.B"],
			
 
				+    ["2. Multinational operations", "M.2"],
			
 
				+    ["", "\\IGNORE"],
			
 
				+    [
			
 
				+        "3. Farmland converted to Tato land",
			
 
				+        "5.B.3",
			
 
				+    ],  # new codes in 2021 inventory start here
			
 
				+    ["4. N2O emission due to conversion to agricultural land", "5.B.4"],
			
 
				+    [
			
 
				+        "5. CO2 emission from agricultural lime application in agricultural land",
			
 
				+        "5.B.5",
			
 
				+    ],
			
 
				+    ["6. Emissions from burning biomass on agricultural land", "5.B.6"],
			
 
				+    ["G. Others", "5.G"],
			
 
				 ]
			
 
				 cat_name_translations = dict(zip(original_names, [cat[0] for cat in translations]))
			
 
				 cat_codes = dict(zip(original_names, [cat[1] for cat in translations]))
			
 
				 
			
 
				 remove_cats = [
			
 
				-    '1.A.1.a', '1.A.1.b', '1.A.1.c', '1.A.2.f',
			
 
				-    '2.A', '2.D',
			
 
				-    '2.F', '2.G',
			
 
				-    '4.C.1', '4.C.4',
			
 
				-    '4.D',
			
 
				-    '4.F.1', '4.F.2', '4.F.3', '4.F.4', '4.F.5',  # detail not in 2006 categories
			
 
				-    '5.A', '5.A.1', '5.A.2', '5.A.3', '5.A.4', '5.A.5',  # don't not match IPCC
			
 
				+    "1.A.1.a",
			
 
				+    "1.A.1.b",
			
 
				+    "1.A.1.c",
			
 
				+    "1.A.2.f",
			
 
				+    "2.A",
			
 
				+    "2.D",
			
 
				+    "2.F",
			
 
				+    "2.G",
			
 
				+    "4.C.1",
			
 
				+    "4.C.4",
			
 
				+    "4.D",
			
 
				+    "4.F.1",
			
 
				+    "4.F.2",
			
 
				+    "4.F.3",
			
 
				+    "4.F.4",
			
 
				+    "4.F.5",  # detail not in 2006 categories
			
 
				+    "5.A",
			
 
				+    "5.A.1",
			
 
				+    "5.A.2",
			
 
				+    "5.A.3",
			
 
				+    "5.A.4",
			
 
				+    "5.A.5",  # don't not match IPCC
			
 
				     # categories
			
 
				-    '5.B', '5.B.1', '5.B.2', '5.B.3', '5.B.4', '5.B.5',
			
 
				-    '5.C', '5.C.1', '5.C.2', '5.C.3', '5.C.4',
			
 
				-    '5.D', '5.D.1', '5.D.2', '5.D.3', '5.D.4',
			
 
				-    '5.E', '5.F',
			
 
				-    '5.G', '5.B.6', # for 2021 NIR
			
 
				+    "5.B",
			
 
				+    "5.B.1",
			
 
				+    "5.B.2",
			
 
				+    "5.B.3",
			
 
				+    "5.B.4",
			
 
				+    "5.B.5",
			
 
				+    "5.C",
			
 
				+    "5.C.1",
			
 
				+    "5.C.2",
			
 
				+    "5.C.3",
			
 
				+    "5.C.4",
			
 
				+    "5.D",
			
 
				+    "5.D.1",
			
 
				+    "5.D.2",
			
 
				+    "5.D.3",
			
 
				+    "5.D.4",
			
 
				+    "5.E",
			
 
				+    "5.F",
			
 
				+    "5.G",
			
 
				+    "5.B.6",  # for 2021 NIR
			
 
				 ]
			
 
				 
			
 
				 aggregate_before_mapping = {
			
 
				-    '2006.2.D.4': {'sources': ['2.A.5', '2.A.6'], 'name': 'Other'},
			
 
				-    '2006.3.C.4': {'sources': ['4.D.1', '4.D.2'],
			
 
				-                   'name': 'Direct N2O Emissions from Managed Soils'},
			
 
				-    '2006.M.3C1AG': {'sources': ['4.E', '4.F'], 'name': 'Biomass burning Agriculture'},
			
 
				-    '2006.1.A.2.m': {'sources': ['1.A.2.f.2', '1.A.2.f.6'], 'name': 'Other'},
			
 
				+    "2006.2.D.4": {"sources": ["2.A.5", "2.A.6"], "name": "Other"},
			
 
				+    "2006.3.C.4": {
			
 
				+        "sources": ["4.D.1", "4.D.2"],
			
 
				+        "name": "Direct N2O Emissions from Managed Soils",
			
 
				+    },
			
 
				+    "2006.M.3C1AG": {"sources": ["4.E", "4.F"], "name": "Biomass burning Agriculture"},
			
 
				+    "2006.1.A.2.m": {"sources": ["1.A.2.f.2", "1.A.2.f.6"], "name": "Other"},
			
 
				 }
			
 
				 
			
 
				 cat_mapping = {
			
 
				-    '1.A.2.f.1': '1.A.2.f',
			
 
				-    '1.A.2.f.3': '1.A.2.j',
			
 
				-    '1.A.2.f.4': '1.A.2.k',
			
 
				-    '1.A.2.f.5': '1.A.2.l',
			
 
				-    '2006.1.A.2.m': '1.A.2.m',
			
 
				-    '2.A.4': '2.B.7',  # add to 2.B
			
 
				-    '2.A.3': '2.A.4',
			
 
				-    '2.D': '2.H',
			
 
				-    '2006.2.D.4': '2.D.4',
			
 
				-    '2.E': '2.B.9',  # add to 2.B
			
 
				-    '2.E.1': '2.B.9.a',
			
 
				-    '2.E.2': '2.B.9.b',
			
 
				+    "1.A.2.f.1": "1.A.2.f",
			
 
				+    "1.A.2.f.3": "1.A.2.j",
			
 
				+    "1.A.2.f.4": "1.A.2.k",
			
 
				+    "1.A.2.f.5": "1.A.2.l",
			
 
				+    "2006.1.A.2.m": "1.A.2.m",
			
 
				+    "2.A.4": "2.B.7",  # add to 2.B
			
 
				+    "2.A.3": "2.A.4",
			
 
				+    "2.D": "2.H",
			
 
				+    "2006.2.D.4": "2.D.4",
			
 
				+    "2.E": "2.B.9",  # add to 2.B
			
 
				+    "2.E.1": "2.B.9.a",
			
 
				+    "2.E.2": "2.B.9.b",
			
 
				     #    '2.F', # remove?
			
 
				-    '2.F.1': '2.F.1',  # just added here to avoid confusion
			
 
				+    "2.F.1": "2.F.1",  # just added here to avoid confusion
			
 
				     #    '2.F.2', '2.F.3', '2.F.4', '2.F.5',
			
 
				-    '2.F.6': '2.E_1',
			
 
				-    '2.F.7': '2.E_2',
			
 
				-    '2.F.8': '2.G.1',
			
 
				-    '2.F.9': '2.G.2',
			
 
				-    '4': 'M.AG',
			
 
				-    '4.A': '3.A.1',
			
 
				-    '4.A.1': '3.A.1.a',
			
 
				-    '4.A.2': '3.A.1.b',
			
 
				-    '4.A.3': '3.A.1.c',
			
 
				-    '4.A.4': '3.A.1.d',
			
 
				-    '4.A.5': '3.A.1.e',
			
 
				-    '4.A.6': '3.A.1.f',
			
 
				-    '4.A.7': '3.A.1.g',
			
 
				-    '4.A.8': '3.A.1.h',
			
 
				-    '4.A.9': '3.A.1.i',
			
 
				-    '4.A.10': '3.A.1.j',
			
 
				-    '4.B': '3.A.2',
			
 
				-    '4.B.1': '3.A.2.a',
			
 
				-    '4.B.2': '3.A.2.b',
			
 
				-    '4.B.3': '3.A.2.c',
			
 
				-    '4.B.4': '3.A.2.d',
			
 
				-    '4.B.5': '3.A.2.e',
			
 
				-    '4.B.6': '3.A.2.f',
			
 
				-    '4.B.7': '3.A.2.g',
			
 
				-    '4.B.8': '3.A.2.h',
			
 
				-    '4.B.9': '3.A.2.i',
			
 
				-    '4.B.10': '3.A.2.j',
			
 
				-    '4.C': '3.C.7',
			
 
				-    '2006.3.C.4': '3.C.4',
			
 
				-    '4.D.3': '3.C.5',
			
 
				-    '2006.M.3C1AG': 'M.3.C.1.AG',
			
 
				-    '5': 'M.LULUCF',
			
 
				-    '6': '4',
			
 
				-    '6.A': '4.A',
			
 
				-    '6.A.1': '4.A.1',
			
 
				-    '6.A.2': '4.A.2',
			
 
				-    '6.B': '4.D',
			
 
				-    '6.B.1': '4.D.1',
			
 
				-    '6.B.2': '4.D.2',
			
 
				-    '6.C': '4.C.1',
			
 
				-    '6.D': '4.E',
			
 
				-    'M.1': 'M.BK',
			
 
				-    'M.1.A': 'M.BK.A',
			
 
				-    'M.1.B': 'M.BK.M',
			
 
				+    "2.F.6": "2.E_1",
			
 
				+    "2.F.7": "2.E_2",
			
 
				+    "2.F.8": "2.G.1",
			
 
				+    "2.F.9": "2.G.2",
			
 
				+    "4": "M.AG",
			
 
				+    "4.A": "3.A.1",
			
 
				+    "4.A.1": "3.A.1.a",
			
 
				+    "4.A.2": "3.A.1.b",
			
 
				+    "4.A.3": "3.A.1.c",
			
 
				+    "4.A.4": "3.A.1.d",
			
 
				+    "4.A.5": "3.A.1.e",
			
 
				+    "4.A.6": "3.A.1.f",
			
 
				+    "4.A.7": "3.A.1.g",
			
 
				+    "4.A.8": "3.A.1.h",
			
 
				+    "4.A.9": "3.A.1.i",
			
 
				+    "4.A.10": "3.A.1.j",
			
 
				+    "4.B": "3.A.2",
			
 
				+    "4.B.1": "3.A.2.a",
			
 
				+    "4.B.2": "3.A.2.b",
			
 
				+    "4.B.3": "3.A.2.c",
			
 
				+    "4.B.4": "3.A.2.d",
			
 
				+    "4.B.5": "3.A.2.e",
			
 
				+    "4.B.6": "3.A.2.f",
			
 
				+    "4.B.7": "3.A.2.g",
			
 
				+    "4.B.8": "3.A.2.h",
			
 
				+    "4.B.9": "3.A.2.i",
			
 
				+    "4.B.10": "3.A.2.j",
			
 
				+    "4.C": "3.C.7",
			
 
				+    "2006.3.C.4": "3.C.4",
			
 
				+    "4.D.3": "3.C.5",
			
 
				+    "2006.M.3C1AG": "M.3.C.1.AG",
			
 
				+    "5": "M.LULUCF",
			
 
				+    "6": "4",
			
 
				+    "6.A": "4.A",
			
 
				+    "6.A.1": "4.A.1",
			
 
				+    "6.A.2": "4.A.2",
			
 
				+    "6.B": "4.D",
			
 
				+    "6.B.1": "4.D.1",
			
 
				+    "6.B.2": "4.D.2",
			
 
				+    "6.C": "4.C.1",
			
 
				+    "6.D": "4.E",
			
 
				+    "M.1": "M.BK",
			
 
				+    "M.1.A": "M.BK.A",
			
 
				+    "M.1.B": "M.BK.M",
			
 
				 }
			
 
				 
			
 
				 aggregate_after_mapping = {
			
 
				-    '1.A.3.a': {'sources': ['1.A.3.a.2'], 'name': 'Civil Aviation'},  # aviation
			
 
				-    '1.A.3.d': {'sources': ['1.A.3.d.2'], 'name': 'Water-borne Navigation'},  # shipping
			
 
				-    '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4', '2.A.5', '2.A.6'],
			
 
				-            'name': 'Mineral Industry'},
			
 
				-    '2.B': {'sources': ['2.B', '2.B.7', '2.B.9'], 'name': 'Chemical Industry'},
			
 
				-    '2.D': {'sources': ['2.D.4'], 'name': 'Other'},
			
 
				-    '2.E': {'sources': ['2.E_1', '2.E_2'], 'name': 'Electronics Industry'},
			
 
				-    '2.F': {'sources': ['2.F.1', '2.F.2', '2.F.3', '2.F.4', '2.F.5'],
			
 
				-            'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
			
 
				-    '2.G': {'sources': ['2.G.1', '2.G.2'], 'name': 'Other Product Manufacture and Use'},
			
 
				-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-    '3.C': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
			
 
				-                 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    'M.3.C.AG': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
			
 
				-                 'name': 'Aggregate sources and non-CO2 emissions sources on land ('
			
 
				-                         'Agriculture)'},
			
 
				-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
			
 
				-    '4.C': {'sources': ['4.C.1'], 'name': 'Incineration and Open Burning of Waste'},
			
 
				+    "1.A.3.a": {"sources": ["1.A.3.a.2"], "name": "Civil Aviation"},  # aviation
			
 
				+    "1.A.3.d": {"sources": ["1.A.3.d.2"], "name": "Water-borne Navigation"},  # shipping
			
 
				+    "2.A": {
			
 
				+        "sources": ["2.A.1", "2.A.2", "2.A.4", "2.A.5", "2.A.6"],
			
 
				+        "name": "Mineral Industry",
			
 
				+    },
			
 
				+    "2.B": {"sources": ["2.B", "2.B.7", "2.B.9"], "name": "Chemical Industry"},
			
 
				+    "2.D": {"sources": ["2.D.4"], "name": "Other"},
			
 
				+    "2.E": {"sources": ["2.E_1", "2.E_2"], "name": "Electronics Industry"},
			
 
				+    "2.F": {
			
 
				+        "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5"],
			
 
				+        "name": "Product uses as Substitutes for Ozone Depleting Substances",
			
 
				+    },
			
 
				+    "2.G": {"sources": ["2.G.1", "2.G.2"], "name": "Other Product Manufacture and Use"},
			
 
				+    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+    "3.C": {
			
 
				+        "sources": ["3.C.4", "3.C.5", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+    },
			
 
				+    "M.3.C.AG": {
			
 
				+        "sources": ["3.C.4", "3.C.5", "3.C.7"],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land ("
			
 
				+        "Agriculture)",
			
 
				+    },
			
 
				+    "M.AG.ELV": {"sources": ["M.3.C.AG"], "name": "Agriculture excluding livestock"},
			
 
				+    "4.C": {"sources": ["4.C.1"], "name": "Incineration and Open Burning of Waste"},
			
 
				 }
			
 
				 
			
 
				 coords_terminologies_2006 = {
			
@@ -422,27 +522,35 @@ filter_remove_2006 = {
 
				     },
			
 
				     "livestock": {  # temp until double cat name problem is solved
			
 
				         "category (IPCC2006_PRIMAP)": [
			
 
				-            '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				-            '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				+            "4.B.1",
			
 
				+            "4.B.10",
			
 
				+            "4.B.2",
			
 
				+            "4.B.3",
			
 
				+            "4.B.4",
			
 
				+            "4.B.5",
			
 
				+            "4.B.6",
			
 
				+            "4.B.7",
			
 
				+            "4.B.8",
			
 
				+            "4.B.9",
			
 
				         ]
			
 
				     },
			
 
				-    "fmap": {
			
 
				-        "category (IPCC2006_PRIMAP)": remove_cats
			
 
				-    },
			
 
				+    "fmap": {"category (IPCC2006_PRIMAP)": remove_cats},
			
 
				     "f_bef_map": {
			
 
				         "category (IPCC2006_PRIMAP)": [
			
 
				-            '2.A.5', '2.A.6',  # combined to 2006.2.D.4
			
 
				-            '4.D.1', '4.D.2',  # combined to 2006.3.C.4
			
 
				-            '4.E', '4.F',  # 2006.M.3.C.1.AG
			
 
				-            '1.A.2.f.2', '1.A.2.f.6',  # 2006.1.A.2.m
			
 
				+            "2.A.5",
			
 
				+            "2.A.6",  # combined to 2006.2.D.4
			
 
				+            "4.D.1",
			
 
				+            "4.D.2",  # combined to 2006.3.C.4
			
 
				+            "4.E",
			
 
				+            "4.F",  # 2006.M.3.C.1.AG
			
 
				+            "1.A.2.f.2",
			
 
				+            "1.A.2.f.6",  # 2006.1.A.2.m
			
 
				         ]
			
 
				-    }
			
 
				+    },
			
 
				 }
			
 
				 
			
 
				 filter_remove_after_agg = {
			
 
				     "tempCats": {
			
 
				-        "category (IPCC2006_PRIMAP)": [
			
 
				-            "2.E_1", "2.E_2"
			
 
				-        ],
			
 
				+        "category (IPCC2006_PRIMAP)": ["2.E_1", "2.E_2"],
			
 
				     },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
@@ -1,12 +1,18 @@
 
				-# this script reads data from Korea's 2021 national inventory which is underlying BUR4
			
 
				-# Data is read from the xlsx file
			
 
				+"""
			
 
				+Read Korea's 2021 inventory from xlsx
			
 
				+
			
 
				+This script reads data from Korea's 2021 national inventory
			
 
				+Data are read from the xlsx file
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_kor_bur4 import (
			
 
				+from config_kor_bur4 import (
			
 
				     aggregate_after_mapping,
			
 
				     aggregate_before_mapping,
			
 
				     cat_codes,
			
@@ -24,42 +30,43 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
			
 
				-                   '2021-Inventory'
			
 
				-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Republic_of_Korea'
			
 
				+    input_folder = (
			
 
				+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2021-Inventory"
			
 
				+    )
			
 
				+    output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'KOR_2021-Inventory_2021_'
			
 
				+    output_filename = "KOR_2021-Inventory_2021_"
			
 
				 
			
 
				-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2019).xlsx'
			
 
				+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2019).xlsx"
			
 
				     years_to_read = range(1990, 2019 + 1)
			
 
				 
			
 
				-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
			
 
				+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
			
 
				     cols_to_read = range(1, 2019 - 1990 + 3)
			
 
				 
			
 
				     # columns for category code and original category name
			
 
				-    index_cols = ['분야·부문/연도']
			
 
				+    index_cols = ["분야·부문/연도"]
			
 
				 
			
 
				     sheet_metadata = {
			
 
				-        'entity': {
			
 
				-            '온실가스': 'KYOTOGHG (SARGWP100)',
			
 
				-            'CO2': 'CO2',
			
 
				-            'CH4': 'CH4 (SARGWP100)',
			
 
				-            'N2O': 'N2O (SARGWP100)',
			
 
				-            'HFCs': 'HFCS (SARGWP100)',
			
 
				-            'PFCs': 'PFCS (SARGWP100)',
			
 
				-            'SF6': 'SF6 (SARGWP100)',
			
 
				+        "entity": {
			
 
				+            "온실가스": "KYOTOGHG (SARGWP100)",
			
 
				+            "CO2": "CO2",
			
 
				+            "CH4": "CH4 (SARGWP100)",
			
 
				+            "N2O": "N2O (SARGWP100)",
			
 
				+            "HFCs": "HFCS (SARGWP100)",
			
 
				+            "PFCs": "PFCS (SARGWP100)",
			
 
				+            "SF6": "SF6 (SARGWP100)",
			
 
				+        },
			
 
				+        "unit": {
			
 
				+            "온실가스": "Gg CO2 / yr",
			
 
				+            "CO2": "Gg CO2 / yr",
			
 
				+            "CH4": "Gg CO2 / yr",
			
 
				+            "N2O": "Gg CO2 / yr",
			
 
				+            "HFCs": "Gg CO2 / yr",
			
 
				+            "PFCs": "Gg CO2 / yr",
			
 
				+            "SF6": "Gg CO2 / yr",
			
 
				         },
			
 
				-        'unit': {
			
 
				-            '온실가스': 'Gg CO2 / yr',
			
 
				-            'CO2': 'Gg CO2 / yr',
			
 
				-            'CH4': 'Gg CO2 / yr',
			
 
				-            'N2O': 'Gg CO2 / yr',
			
 
				-            'HFCs': 'Gg CO2 / yr',
			
 
				-            'PFCs': 'Gg CO2 / yr',
			
 
				-            'SF6': 'Gg CO2 / yr',
			
 
				-        }
			
 
				     }
			
 
				 
			
 
				     # definitions for conversion to interchange format
			
@@ -73,7 +80,7 @@ if __name__ == "__main__":
 
				 
			
 
				     add_coords_cols = {
			
 
				         "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-        "cat_name_translation": ["cat_name_translation", "category"]
			
 
				+        "cat_name_translation": ["cat_name_translation", "category"],
			
 
				     }
			
 
				 
			
 
				     coords_terminologies = {
			
@@ -99,12 +106,20 @@ if __name__ == "__main__":
 
				         "f1": {
			
 
				             "category (IPCC1996_KOR_INV)": "\\IGNORE",
			
 
				         },
			
 
				-        "livestock": { # temp until double cat name problem is solved
			
 
				+        "livestock": {  # temp until double cat name problem is solved
			
 
				             "category (IPCC1996_KOR_INV)": [
			
 
				-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				+                "4.B.1",
			
 
				+                "4.B.10",
			
 
				+                "4.B.2",
			
 
				+                "4.B.3",
			
 
				+                "4.B.4",
			
 
				+                "4.B.5",
			
 
				+                "4.B.6",
			
 
				+                "4.B.7",
			
 
				+                "4.B.8",
			
 
				+                "4.B.9",
			
 
				             ]
			
 
				-        }
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				     filter_keep = {}
			
@@ -115,7 +130,8 @@ if __name__ == "__main__":
 
				         "contact": "mail@johannes-guetschow.de",
			
 
				         "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2021",
			
 
				         "comment": "Read fom xlsx file by Johannes Gütschow",
			
 
				-        "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
			
 
				+        "institution": "Republic of Korea, Ministry of Environment, "
			
 
				+        "Greenhouse Gas Inventory and Research Center",
			
 
				     }
			
 
				 
			
 
				     cols_for_space_stripping = []
			
@@ -135,11 +151,17 @@ if __name__ == "__main__":
 
				 
			
 
				     for sheet in sheets_to_read:
			
 
				         # read current sheet (one sheet per gas)
			
 
				-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
			
 
				-                                   engine="openpyxl")
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=sheet,
			
 
				+            skiprows=3,
			
 
				+            nrows=146,
			
 
				+            usecols=cols_to_read,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set index. necessary for the stack operation in the conversion to long format
			
 
				         # df_current = df_current.set_index(index_cols)
			
 
				         # add columns
			
@@ -153,7 +175,7 @@ if __name__ == "__main__":
 
				 
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				     # rename category col because filtering produces problems with korean col names
			
 
				-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
			
 
				+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
			
 
				 
			
 
				     # create copies of category col for further processing
			
 
				     df_all["orig_cat_name"] = df_all["category"]
			
@@ -172,20 +194,22 @@ if __name__ == "__main__":
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-    copy_df=True, # we need the unchanged DF for the conversion step
			
 
				-        )
			
 
				+        copy_df=True,  # we need the unchanged DF for the conversion step
			
 
				+    )
			
 
				 
			
 
				     filter_data(data_if, filter_remove=filter_remove)
			
 
				 
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				-    data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
			
 
				+    data_pm2 = data_pm2.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				     # ###
			
@@ -193,17 +217,20 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				-    #pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    #data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # conversion to ipcc 2006 categories
			
 
				     # ###
			
 
				-
			
 
				-
			
 
				     data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_all,
			
 
				         coords_cols=coords_cols,
			
@@ -216,21 +243,23 @@ if __name__ == "__main__":
 
				         copy_df=True,  # don't mess up the dataframe when testing
			
 
				     )
			
 
				 
			
 
				-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
			
 
				+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				     # agg before mapping
			
 
				 
			
 
				     for cat_to_agg in aggregate_before_mapping:
			
 
				-        mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_before_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -238,20 +267,25 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum()
			
 
				-
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name",
			
 
				-                              aggregate_before_mapping[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				             if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
			
 
				-                filter_this_cat = {
			
 
				-                    "f": {cat_label: cat_to_agg}
			
 
				-                }
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				                 filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				 
			
 
				             data_if_2006 = pd.concat([data_if_2006, df_combine])
			
@@ -268,17 +302,19 @@ if __name__ == "__main__":
 
				     # agg after mapping
			
 
				 
			
 
				     for cat_to_agg in aggregate_after_mapping:
			
 
				-        mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_after_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -286,36 +322,49 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum()
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				 
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name",
			
 
				-                              aggregate_after_mapping[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				             if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
			
 
				-                filter_this_cat = {
			
 
				-                    "f": {cat_label: cat_to_agg}
			
 
				-                }
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				                 filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				 
			
 
				             data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				-    data_pm2_2006 = data_pm2_2006.reset_coords(["orig_cat_name", "cat_name_translation"],
			
 
				-                                           drop=True)
			
 
				+    data_pm2_2006 = data_pm2_2006.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				     # save IPCC2006 data
			
 
				 
			
 
				     filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				+        data_if_2006,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
@@ -1,12 +1,17 @@
 
				-# this script reads data from Korea's 2021 national inventory which is underlying BUR4
			
 
				-# Data is read from the xlsx file
			
 
				+"""
			
 
				+Read Korea's 2021 inventory from xlsx
			
 
				+
			
 
				+This script reads data from Korea's 2022 national inventory
			
 
				+Data are read from the xlsx file
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_kor_bur4 import (
			
 
				+from config_kor_bur4 import (
			
 
				     aggregate_after_mapping,
			
 
				     aggregate_before_mapping,
			
 
				     cat_codes,
			
@@ -24,42 +29,43 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
			
 
				-                   '2022-Inventory'
			
 
				-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Republic_of_Korea'
			
 
				+    input_folder = (
			
 
				+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2022-Inventory"
			
 
				+    )
			
 
				+    output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'KOR_2022-Inventory_2022_'
			
 
				+    output_filename = "KOR_2022-Inventory_2022_"
			
 
				 
			
 
				-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2020).xlsx'
			
 
				+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2020).xlsx"
			
 
				     years_to_read = range(1990, 2020 + 1)
			
 
				 
			
 
				-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
			
 
				+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
			
 
				     cols_to_read = range(1, 2020 - 1990 + 3)
			
 
				 
			
 
				     # columns for category code and original category name
			
 
				-    index_cols = ['분야·부문/연도']
			
 
				+    index_cols = ["분야·부문/연도"]
			
 
				 
			
 
				     sheet_metadata = {
			
 
				-        'entity': {
			
 
				-            '온실가스': 'KYOTOGHG (SARGWP100)',
			
 
				-            'CO2': 'CO2',
			
 
				-            'CH4': 'CH4 (SARGWP100)',
			
 
				-            'N2O': 'N2O (SARGWP100)',
			
 
				-            'HFCs': 'HFCS (SARGWP100)',
			
 
				-            'PFCs': 'PFCS (SARGWP100)',
			
 
				-            'SF6': 'SF6 (SARGWP100)',
			
 
				+        "entity": {
			
 
				+            "온실가스": "KYOTOGHG (SARGWP100)",
			
 
				+            "CO2": "CO2",
			
 
				+            "CH4": "CH4 (SARGWP100)",
			
 
				+            "N2O": "N2O (SARGWP100)",
			
 
				+            "HFCs": "HFCS (SARGWP100)",
			
 
				+            "PFCs": "PFCS (SARGWP100)",
			
 
				+            "SF6": "SF6 (SARGWP100)",
			
 
				+        },
			
 
				+        "unit": {
			
 
				+            "온실가스": "Gg CO2 / yr",
			
 
				+            "CO2": "Gg CO2 / yr",
			
 
				+            "CH4": "Gg CO2 / yr",
			
 
				+            "N2O": "Gg CO2 / yr",
			
 
				+            "HFCs": "Gg CO2 / yr",
			
 
				+            "PFCs": "Gg CO2 / yr",
			
 
				+            "SF6": "Gg CO2 / yr",
			
 
				         },
			
 
				-        'unit': {
			
 
				-            '온실가스': 'Gg CO2 / yr',
			
 
				-            'CO2': 'Gg CO2 / yr',
			
 
				-            'CH4': 'Gg CO2 / yr',
			
 
				-            'N2O': 'Gg CO2 / yr',
			
 
				-            'HFCs': 'Gg CO2 / yr',
			
 
				-            'PFCs': 'Gg CO2 / yr',
			
 
				-            'SF6': 'Gg CO2 / yr',
			
 
				-        }
			
 
				     }
			
 
				 
			
 
				     # definitions for conversion to interchange format
			
@@ -73,7 +79,7 @@ if __name__ == "__main__":
 
				 
			
 
				     add_coords_cols = {
			
 
				         "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-        "cat_name_translation": ["cat_name_translation", "category"]
			
 
				+        "cat_name_translation": ["cat_name_translation", "category"],
			
 
				     }
			
 
				 
			
 
				     coords_terminologies = {
			
@@ -99,12 +105,20 @@ if __name__ == "__main__":
 
				         "f1": {
			
 
				             "category (IPCC1996_KOR_INV)": "\\IGNORE",
			
 
				         },
			
 
				-        "livestock": { # temp until double cat name problem is solved
			
 
				+        "livestock": {  # temp until double cat name problem is solved
			
 
				             "category (IPCC1996_KOR_INV)": [
			
 
				-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				+                "4.B.1",
			
 
				+                "4.B.10",
			
 
				+                "4.B.2",
			
 
				+                "4.B.3",
			
 
				+                "4.B.4",
			
 
				+                "4.B.5",
			
 
				+                "4.B.6",
			
 
				+                "4.B.7",
			
 
				+                "4.B.8",
			
 
				+                "4.B.9",
			
 
				             ]
			
 
				-        }
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				     filter_keep = {}
			
@@ -115,11 +129,10 @@ if __name__ == "__main__":
 
				         "contact": "mail@johannes-guetschow.de",
			
 
				         "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2022",
			
 
				         "comment": "Read fom xlsx file by Johannes Gütschow",
			
 
				-        "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
			
 
				+        "institution": "Republic of Korea, Ministry of Environment, "
			
 
				+        "Greenhouse Gas Inventory and Research Center",
			
 
				     }
			
 
				 
			
 
				-
			
 
				-
			
 
				     cols_for_space_stripping = []
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
@@ -137,11 +150,17 @@ if __name__ == "__main__":
 
				 
			
 
				     for sheet in sheets_to_read:
			
 
				         # read current sheet (one sheet per gas)
			
 
				-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
			
 
				-                                   engine="openpyxl")
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=sheet,
			
 
				+            skiprows=3,
			
 
				+            nrows=146,
			
 
				+            usecols=cols_to_read,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set index. necessary for the stack operation in the conversion to long format
			
 
				         # df_current = df_current.set_index(index_cols)
			
 
				         # make sure all col headers are str
			
@@ -157,14 +176,12 @@ if __name__ == "__main__":
 
				 
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				     # rename category col because filtering produces problems with korean col names
			
 
				-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
			
 
				+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
			
 
				 
			
 
				     # create copies of category col for further processing
			
 
				     df_all["orig_cat_name"] = df_all["category"]
			
 
				     df_all["cat_name_translation"] = df_all["category"]
			
 
				 
			
 
				-
			
 
				-
			
 
				     # ###
			
 
				     # convert to PRIMAP2 interchange format
			
 
				     # ###
			
@@ -175,20 +192,22 @@ if __name__ == "__main__":
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-        copy_df=True, # we need the unchanged DF for the conversion step
			
 
				-        )
			
 
				+        copy_df=True,  # we need the unchanged DF for the conversion step
			
 
				+    )
			
 
				 
			
 
				     filter_data(data_if, filter_remove=filter_remove)
			
 
				 
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				-    data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
			
 
				+    data_pm2 = data_pm2.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				     # ###
			
@@ -196,17 +215,21 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # conversion to ipcc 2006 categories
			
 
				     # ###
			
 
				 
			
 
				-
			
 
				     data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_all,
			
 
				         coords_cols=coords_cols,
			
@@ -219,21 +242,23 @@ if __name__ == "__main__":
 
				         copy_df=True,  # don't mess up the dataframe when testing
			
 
				     )
			
 
				 
			
 
				-    cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
			
 
				+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				     # agg before mapping
			
 
				 
			
 
				     for cat_to_agg in aggregate_before_mapping:
			
 
				-        mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_before_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -241,20 +266,32 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum()
			
 
				-
			
 
				-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				+
			
 
				+            df_combine = df_combine.drop(
			
 
				+                columns=[
			
 
				+                    "category (IPCC2006_PRIMAP)",
			
 
				+                    "orig_cat_name",
			
 
				+                    "cat_name_translation",
			
 
				+                ]
			
 
				+            )
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name",
			
 
				-                              aggregate_before_mapping[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				             if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
			
 
				-                filter_this_cat = {
			
 
				-                    "f": {cat_label: cat_to_agg}
			
 
				-                }
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				                 filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				 
			
 
				             data_if_2006 = pd.concat([data_if_2006, df_combine])
			
@@ -271,17 +308,19 @@ if __name__ == "__main__":
 
				     # agg after mapping
			
 
				 
			
 
				     for cat_to_agg in aggregate_after_mapping:
			
 
				-        mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_after_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -289,37 +328,56 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum()
			
 
				-
			
 
				-            df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				+
			
 
				+            df_combine = df_combine.drop(
			
 
				+                columns=[
			
 
				+                    "category (IPCC2006_PRIMAP)",
			
 
				+                    "orig_cat_name",
			
 
				+                    "cat_name_translation",
			
 
				+                ]
			
 
				+            )
			
 
				             df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-            df_combine.insert(1, "orig_cat_name",
			
 
				-                              aggregate_after_mapping[cat_to_agg]["name"])
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
 
				 
			
 
				             if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
			
 
				-                filter_this_cat = {
			
 
				-                    "f": {cat_label: cat_to_agg}
			
 
				-                }
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				                 filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				 
			
 
				             data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				         else:
			
 
				             print(f"no data to aggregate category {cat_to_agg}")
			
 
				 
			
 
				-
			
 
				-    #conversion to PRIMAP2 native format
			
 
				+    # conversion to PRIMAP2 native format
			
 
				     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				     # convert back to IF to have units in the fixed format
			
 
				-    data_pm2_2006 = data_pm2_2006.reset_coords(["orig_cat_name", "cat_name_translation"],
			
 
				-                                           drop=True)
			
 
				+    data_pm2_2006 = data_pm2_2006.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				     data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				     # save IPCC2006 data
			
 
				 
			
 
				     filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				+        data_if_2006,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
@@ -1,12 +1,17 @@
 
				-# this script reads data from Korea's BUR4
			
 
				-# Data is read from the xlsx file
			
 
				+"""
			
 
				+Read Korea's BUR4 from xlsx
			
 
				+
			
 
				+This script reads data from Korea's 2020 national inventory which is underlying BUR4
			
 
				+Data are read from the xlsx file
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import os
			
 
				 import sys
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_kor_bur4 import cat_codes, cat_name_translations
			
 
				+from config_kor_bur4 import cat_codes, cat_name_translations
			
 
				 from primap2.pm2io._data_reading import filter_data
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
@@ -15,42 +20,43 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
			
 
				-                   '2020-Inventory'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Republic_of_Korea'
			
 
				+    input_folder = (
			
 
				+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2020-Inventory"
			
 
				+    )
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Republic_of_Korea"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'KOR_BUR4_2021_'
			
 
				+    output_filename = "KOR_BUR4_2021_"
			
 
				 
			
 
				-    inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2018).xlsx'
			
 
				+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2018).xlsx"
			
 
				     years_to_read = range(1990, 2018 + 1)
			
 
				 
			
 
				-    sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
			
 
				+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
			
 
				     cols_to_read = range(1, 2018 - 1990 + 3)
			
 
				 
			
 
				     # columns for category code and original category name
			
 
				-    index_cols = ['분야·부문/연도']
			
 
				+    index_cols = ["분야·부문/연도"]
			
 
				 
			
 
				     sheet_metadata = {
			
 
				-        'entity': {
			
 
				-            '온실가스': 'KYOTOGHG (SARGWP100)',
			
 
				-            'CO2': 'CO2',
			
 
				-            'CH4': 'CH4 (SARGWP100)',
			
 
				-            'N2O': 'N2O (SARGWP100)',
			
 
				-            'HFCs': 'HFCS (SARGWP100)',
			
 
				-            'PFCs': 'PFCS (SARGWP100)',
			
 
				-            'SF6': 'SF6 (SARGWP100)',
			
 
				+        "entity": {
			
 
				+            "온실가스": "KYOTOGHG (SARGWP100)",
			
 
				+            "CO2": "CO2",
			
 
				+            "CH4": "CH4 (SARGWP100)",
			
 
				+            "N2O": "N2O (SARGWP100)",
			
 
				+            "HFCs": "HFCS (SARGWP100)",
			
 
				+            "PFCs": "PFCS (SARGWP100)",
			
 
				+            "SF6": "SF6 (SARGWP100)",
			
 
				+        },
			
 
				+        "unit": {
			
 
				+            "온실가스": "Gg CO2 / yr",
			
 
				+            "CO2": "Gg CO2 / yr",
			
 
				+            "CH4": "Gg CO2 / yr",
			
 
				+            "N2O": "Gg CO2 / yr",
			
 
				+            "HFCs": "Gg CO2 / yr",
			
 
				+            "PFCs": "Gg CO2 / yr",
			
 
				+            "SF6": "Gg CO2 / yr",
			
 
				         },
			
 
				-        'unit': {
			
 
				-            '온실가스': 'Gg CO2 / yr',
			
 
				-            'CO2': 'Gg CO2 / yr',
			
 
				-            'CH4': 'Gg CO2 / yr',
			
 
				-            'N2O': 'Gg CO2 / yr',
			
 
				-            'HFCs': 'Gg CO2 / yr',
			
 
				-            'PFCs': 'Gg CO2 / yr',
			
 
				-            'SF6': 'Gg CO2 / yr',
			
 
				-        }
			
 
				     }
			
 
				 
			
 
				     # definitions for conversion to interchange format
			
@@ -64,7 +70,7 @@ if __name__ == "__main__":
 
				 
			
 
				     add_coords_cols = {
			
 
				         "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-        "cat_name_translation": ["cat_name_translation", "category"]
			
 
				+        "cat_name_translation": ["cat_name_translation", "category"],
			
 
				     }
			
 
				 
			
 
				     coords_terminologies = {
			
@@ -90,21 +96,32 @@ if __name__ == "__main__":
 
				         "f1": {
			
 
				             "category (IPCC1996_KOR_INV)": "\\IGNORE",
			
 
				         },
			
 
				-        "livestock": { # temp until double cat name problem is solved
			
 
				-            "category (IPCC1996_KOR_INV)": {
			
 
				-                '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				-                '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				-            }
			
 
				-        }
			
 
				+        "livestock": {  # temp until double cat name problem is solved
			
 
				+            "category (IPCC1996_KOR_INV)": [
			
 
				+                "4.B.1",
			
 
				+                "4.B.10",
			
 
				+                "4.B.2",
			
 
				+                "4.B.3",
			
 
				+                "4.B.4",
			
 
				+                "4.B.5",
			
 
				+                "4.B.6",
			
 
				+                "4.B.7",
			
 
				+                "4.B.8",
			
 
				+                "4.B.9",
			
 
				+            ]
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				     filter_keep = {}
			
 
				 
			
 
				     meta_data = {
			
 
				-        "references": "https://unfccc.int/documents/418616, http://www.gir.go.kr/home/file/readDownloadFile.do?fileId=4856&fileSeq=2",
			
 
				+        "references": "https://unfccc.int/documents/418616, "
			
 
				+        "http://www.gir.go.kr/home/file/readDownloadFile.do?"
			
 
				+        "fileId=4856&fileSeq=2",
			
 
				         "rights": "",
			
 
				         "contact": "mail@johannes-guetschow.de.de",
			
 
				-        "title": "Republic of Korea: BUR4 / National Greenhouse Gas Inventory Report 2020",
			
 
				+        "title": "Republic of Korea: BUR4 / National Greenhouse Gas Inventory Report "
			
 
				+        "2020",
			
 
				         "comment": "Read fom xlsx file by Johannes Gütschow",
			
 
				         "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				     }
			
@@ -126,11 +143,17 @@ if __name__ == "__main__":
 
				 
			
 
				     for sheet in sheets_to_read:
			
 
				         # read current sheet (one sheet per gas)
			
 
				-        df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
			
 
				-                                   engine="openpyxl")
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=sheet,
			
 
				+            skiprows=3,
			
 
				+            nrows=144,
			
 
				+            usecols=cols_to_read,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				-        df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				         # set index. necessary for the stack operation in the conversion to long format
			
 
				         # df_current = df_current.set_index(index_cols)
			
 
				         # add columns
			
@@ -144,7 +167,7 @@ if __name__ == "__main__":
 
				 
			
 
				     df_all = df_all.reset_index(drop=True)
			
 
				     # rename category col because filtering produces problems with korean col names
			
 
				-    df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
			
 
				+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
			
 
				 
			
 
				     # create copies of category col for further processing
			
 
				     df_all["orig_cat_name"] = df_all["category"]
			
@@ -163,12 +186,12 @@ if __name__ == "__main__":
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				-        convert_str=True
			
 
				-        )
			
 
				+        convert_str=True,
			
 
				+    )
			
 
				 
			
 
				     filter_data(data_if, filter_remove=filter_remove)
			
 
				 
			
@@ -181,7 +204,12 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				-    pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Singapore's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'SGP'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=SGP
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py
@@ -1,152 +1,222 @@
 
				+"""Config for Singapore's BUR5
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 table_def_templates = {
			
 
				-    '66_1': {  # 66
			
 
				-        "area": ['68,743,522,157'],
			
 
				-        "cols": ['224,280,319,359,399,445,481'],
			
 
				+    "66_1": {  # 66
			
 
				+        "area": ["68,743,522,157"],
			
 
				+        "cols": ["224,280,319,359,399,445,481"],
			
 
				         "rows_to_fix": {
			
 
				             # 2: ['and Sink Categories',],
			
 
				-            3: ['1A2 Manufacturing Industries',
			
 
				-                '1B3 Other Emissions from', '1C - Carbon Dioxide Transport',
			
 
				-                '2 — INDUSTRIAL PROCESSES AND', '2D - Non-Energy Products from',
			
 
				-                '2F - Product Uses as Substitutes for',
			
 
				-                '2G - Other Product Manufacture'],
			
 
				+            3: [
			
 
				+                "1A2 Manufacturing Industries",
			
 
				+                "1B3 Other Emissions from",
			
 
				+                "1C - Carbon Dioxide Transport",
			
 
				+                "2 — INDUSTRIAL PROCESSES AND",
			
 
				+                "2D - Non-Energy Products from",
			
 
				+                "2F - Product Uses as Substitutes for",
			
 
				+                "2G - Other Product Manufacture",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '66_2': {  # 66
			
 
				-        "area": ['671,744,1117,265'],
			
 
				-        "cols": ['824,875,912,954,996,1040,1082'],
			
 
				+    "66_2": {  # 66
			
 
				+        "area": ["671,744,1117,265"],
			
 
				+        "cols": ["824,875,912,954,996,1040,1082"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3 — AGRICULTURE, FORESTRY AND', '3C - Aggregate Sources and Non-CO2',
			
 
				-                '4C - Incineration and Open Burning',
			
 
				-                '4D -  Wastewater Treatment',
			
 
				-                '5A - Indirect N2O emissions from the', 'CO2 from Biomass Combustion',
			
 
				-                ],
			
 
				+            3: [
			
 
				+                "3 — AGRICULTURE, FORESTRY AND",
			
 
				+                "3C - Aggregate Sources and Non-CO2",
			
 
				+                "4C - Incineration and Open Burning",
			
 
				+                "4D -  Wastewater Treatment",
			
 
				+                "5A - Indirect N2O emissions from the",
			
 
				+                "CO2 from Biomass Combustion",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '67_1': {  # 67
			
 
				-        "area": ['70,727,554,159'],
			
 
				-        "cols": ['207,254,291,319,356,400,442,468,503'],
			
 
				+    "67_1": {  # 67
			
 
				+        "area": ["70,727,554,159"],
			
 
				+        "cols": ["207,254,291,319,356,400,442,468,503"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2 — INDUSTRIAL PROCESSES', '2A4 Other Process Uses',
			
 
				-                '2B4 Caprolactam, Glyoxal and', '2B8 Petrochemical and',
			
 
				-                ],
			
 
				-            3: ['Total National Emissions',
			
 
				-                ],
			
 
				+            2: [
			
 
				+                "2 — INDUSTRIAL PROCESSES",
			
 
				+                "2A4 Other Process Uses",
			
 
				+                "2B4 Caprolactam, Glyoxal and",
			
 
				+                "2B8 Petrochemical and",
			
 
				+            ],
			
 
				+            3: [
			
 
				+                "Total National Emissions",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '67_2': {  # 67
			
 
				-        "area": ['666,725,1150,119'],
			
 
				-        "cols": ['801,847,889,915,952,996,1036,1063,1098'],
			
 
				+    "67_2": {  # 67
			
 
				+        "area": ["666,725,1150,119"],
			
 
				+        "cols": ["801,847,889,915,952,996,1036,1063,1098"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2D - Non-Energy Products from', '2G - Other Product',
			
 
				-                '2G2 SF6 and PFCs from', '2H2 Food and Beverages',
			
 
				-                ],
			
 
				-            3: ['Total National Emissions', '2E1 Integrated Circuit',
			
 
				-                '2F - Product Uses as Substitutes for', '2F1 Refrigeration and',
			
 
				-                ],
			
 
				+            2: [
			
 
				+                "2D - Non-Energy Products from",
			
 
				+                "2G - Other Product",
			
 
				+                "2G2 SF6 and PFCs from",
			
 
				+                "2H2 Food and Beverages",
			
 
				+            ],
			
 
				+            3: [
			
 
				+                "Total National Emissions",
			
 
				+                "2E1 Integrated Circuit",
			
 
				+                "2F - Product Uses as Substitutes for",
			
 
				+                "2F1 Refrigeration and",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '68_1': {  # 68
			
 
				-        "area": ['66,787,524,217'],
			
 
				-        "cols": ['205,261,315,366,415,473'],
			
 
				+    "68_1": {  # 68
			
 
				+        "area": ["66,787,524,217"],
			
 
				+        "cols": ["205,261,315,366,415,473"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2 — INDUSTRIAL PROCESSES', '2A4 Other Process Uses',
			
 
				-                '2B4 Caprolactam, Glyoxal and', '2B8 Petrochemical and',
			
 
				-                ],
			
 
				-            3: ['Total National Emissions',
			
 
				-                ],
			
 
				+            2: [
			
 
				+                "2 — INDUSTRIAL PROCESSES",
			
 
				+                "2A4 Other Process Uses",
			
 
				+                "2B4 Caprolactam, Glyoxal and",
			
 
				+                "2B8 Petrochemical and",
			
 
				+            ],
			
 
				+            3: [
			
 
				+                "Total National Emissions",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '68_2': {  # 68
			
 
				-        "area": ['666,787,1119,180'],
			
 
				-        "cols": ['808,854,910,961,1017,1066'],
			
 
				+    "68_2": {  # 68
			
 
				+        "area": ["666,787,1119,180"],
			
 
				+        "cols": ["808,854,910,961,1017,1066"],
			
 
				         "rows_to_fix": {
			
 
				-            2: ['2D - Non-Energy Products from',
			
 
				-                '2F - Product Uses as Substitutes for', '2F1 Refrigeration and Air',
			
 
				-                '2G2 SF6 and PFCs from Other', '2H2 Food and Beverages',
			
 
				-                ],
			
 
				-            3: ['Total National Emissions', '2E1 Integrated Circuit or',
			
 
				-                '2G - Other Product Manufacture',
			
 
				-                ],
			
 
				+            2: [
			
 
				+                "2D - Non-Energy Products from",
			
 
				+                "2F - Product Uses as Substitutes for",
			
 
				+                "2F1 Refrigeration and Air",
			
 
				+                "2G2 SF6 and PFCs from Other",
			
 
				+                "2H2 Food and Beverages",
			
 
				+            ],
			
 
				+            3: [
			
 
				+                "Total National Emissions",
			
 
				+                "2E1 Integrated Circuit or",
			
 
				+                "2G - Other Product Manufacture",
			
 
				+            ],
			
 
				         },
			
 
				     },
			
 
				-    '84_1': {  # 84
			
 
				-        "area": ['70,667,525,112'],
			
 
				-        "cols": ['193,291,345,396,440,480'],
			
 
				+    "84_1": {  # 84
			
 
				+        "area": ["70,667,525,112"],
			
 
				+        "cols": ["193,291,345,396,440,480"],
			
 
				         "rows_to_fix": {},
			
 
				     },
			
 
				-    '84_2': {  # 84
			
 
				-        "area": ['668,667,1115,83'],
			
 
				-        "cols": ['854,908,954,1001,1038,1073'],
			
 
				-        "rows_to_fix": { },
			
 
				+    "84_2": {  # 84
			
 
				+        "area": ["668,667,1115,83"],
			
 
				+        "cols": ["854,908,954,1001,1038,1073"],
			
 
				+        "rows_to_fix": {},
			
 
				     },
			
 
				-    '85_1': {  # 85
			
 
				-        "area": ['70,680,531,170'],
			
 
				-        "cols": ['275,328,375,414,456,489'],
			
 
				+    "85_1": {  # 85
			
 
				+        "area": ["70,680,531,170"],
			
 
				+        "cols": ["275,328,375,414,456,489"],
			
 
				         "rows_to_fix": {},
			
 
				     },
			
 
				-    '85_2': {  # 85
			
 
				-        "area": ['663,675,1117,175'],
			
 
				-        "cols": ['849,908,954,1001,1045,1073'],
			
 
				+    "85_2": {  # 85
			
 
				+        "area": ["663,675,1117,175"],
			
 
				+        "cols": ["849,908,954,1001,1045,1073"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['3C — Aggregate Sources and Non-CO2',
			
 
				-                '3C4 - Direct N2O Emissions from', '3C5 - Indirect N2O Emissions from',
			
 
				-                '3C6 - Indirect N2O Emissions from']
			
 
				+            3: [
			
 
				+                "3C — Aggregate Sources and Non-CO2",
			
 
				+                "3C4 - Direct N2O Emissions from",
			
 
				+                "3C5 - Indirect N2O Emissions from",
			
 
				+                "3C6 - Indirect N2O Emissions from",
			
 
				+            ]
			
 
				         },
			
 
				     },
			
 
				-    '92': {  # 92
			
 
				-        "area": ['72,672,514,333'],
			
 
				-        "cols": ['228,275,319,361,398,438,489'],
			
 
				+    "92": {  # 92
			
 
				+        "area": ["72,672,514,333"],
			
 
				+        "cols": ["228,275,319,361,398,438,489"],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['4A1 Managed Waste',
			
 
				-                '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
			
 
				-                '4C - Incineration and', '4D - Wastewater Treatment',
			
 
				-                '4D1 Domestic Wastewater', '4D2 Industrial Wastewater']
			
 
				+            3: [
			
 
				+                "4A1 Managed Waste",
			
 
				+                "4A2 Unmanaged Waste",
			
 
				+                "4A3 Uncategorised Waste",
			
 
				+                "4C - Incineration and",
			
 
				+                "4D - Wastewater Treatment",
			
 
				+                "4D1 Domestic Wastewater",
			
 
				+                "4D2 Industrial Wastewater",
			
 
				+            ]
			
 
				         },
			
 
				     },
			
 
				-    '95_1': {  # 95
			
 
				-        "area": ['70,731,507,149'],
			
 
				-        "cols": ['233,307,375,452'],
			
 
				+    "95_1": {  # 95
			
 
				+        "area": ["70,731,507,149"],
			
 
				+        "cols": ["233,307,375,452"],
			
 
				         "drop_rows": [0, 1, 2, 3],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['Total (Net)', '1A2 Manufacturing Industries',
			
 
				-                '2 — INDUSTRIAL PROCESSES', '3 — AGRICULTURE, FORESTRY',
			
 
				-                '3C - Aggregate Sources and Non-CO2', '4C - Incineration and Open',
			
 
				-                'Clinical Waste', '4D - Wastewater Treatment',
			
 
				-                'CO2 from Biomass Combustion for']
			
 
				+            3: [
			
 
				+                "Total (Net)",
			
 
				+                "1A2 Manufacturing Industries",
			
 
				+                "2 — INDUSTRIAL PROCESSES",
			
 
				+                "3 — AGRICULTURE, FORESTRY",
			
 
				+                "3C - Aggregate Sources and Non-CO2",
			
 
				+                "4C - Incineration and Open",
			
 
				+                "Clinical Waste",
			
 
				+                "4D - Wastewater Treatment",
			
 
				+                "CO2 from Biomass Combustion for",
			
 
				+            ]
			
 
				         },
			
 
				         "header": {
			
 
				-            'entity': ['Greenhouse Gas Source and Sink Categories',
			
 
				-                       'Net CO2', 'CH4', 'N2O', 'HFCs'],
			
 
				-            'unit': ['', 'Gg', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
			
 
				+            "entity": [
			
 
				+                "Greenhouse Gas Source and Sink Categories",
			
 
				+                "Net CO2",
			
 
				+                "CH4",
			
 
				+                "N2O",
			
 
				+                "HFCs",
			
 
				+            ],
			
 
				+            "unit": ["", "Gg", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
			
 
				         },
			
 
				     },
			
 
				-    '95_2': {  # 95
			
 
				-        "area": ['666,731,1103,149'],
			
 
				-        "cols": ['829,903,971,1048'],
			
 
				+    "95_2": {  # 95
			
 
				+        "area": ["666,731,1103,149"],
			
 
				+        "cols": ["829,903,971,1048"],
			
 
				         "drop_rows": [0, 1, 2, 3, 4, 5],
			
 
				         "rows_to_fix": {
			
 
				-            3: ['Total (Net)', '1A2 Manufacturing Industries',
			
 
				-                '2 — INDUSTRIAL PROCESSES', '3 — AGRICULTURE, FORESTRY',
			
 
				-                '3C - Aggregate Sources and Non-CO2', '4C - Incineration and Open',
			
 
				-                'Clinical Waste', '4D - Wastewater Treatment',
			
 
				-                'CO2 from Biomass Combustion for']
			
 
				+            3: [
			
 
				+                "Total (Net)",
			
 
				+                "1A2 Manufacturing Industries",
			
 
				+                "2 — INDUSTRIAL PROCESSES",
			
 
				+                "3 — AGRICULTURE, FORESTRY",
			
 
				+                "3C - Aggregate Sources and Non-CO2",
			
 
				+                "4C - Incineration and Open",
			
 
				+                "Clinical Waste",
			
 
				+                "4D - Wastewater Treatment",
			
 
				+                "CO2 from Biomass Combustion for",
			
 
				+            ]
			
 
				         },
			
 
				         "header": {
			
 
				-            'entity': ['Greenhouse Gas Source and Sink Categories',
			
 
				-                       'PFCs', 'SF6', 'NF3', 'Total (Net) National Emissions'],
			
 
				-            'unit': ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
			
 
				+            "entity": [
			
 
				+                "Greenhouse Gas Source and Sink Categories",
			
 
				+                "PFCs",
			
 
				+                "SF6",
			
 
				+                "NF3",
			
 
				+                "Total (Net) National Emissions",
			
 
				+            ],
			
 
				+            "unit": ["", "GgCO2eq", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 table_defs = {
			
 
				-    '66': {
			
 
				-        "templates": ['66_1', '66_2'],
			
 
				+    "66": {
			
 
				+        "templates": ["66_1", "66_2"],
			
 
				         # "header_rows": [0, 1],
			
 
				         "header": {
			
 
				-            'entity': ['Greenhouse Gas Source and Sink Categories', 'Net CO2',
			
 
				-                       'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NF3'],
			
 
				-            'unit': ['', 'Gg', 'Gg', 'Gg', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq'],
			
 
				+            "entity": [
			
 
				+                "Greenhouse Gas Source and Sink Categories",
			
 
				+                "Net CO2",
			
 
				+                "CH4",
			
 
				+                "N2O",
			
 
				+                "HFCs",
			
 
				+                "PFCs",
			
 
				+                "SF6",
			
 
				+                "NF3",
			
 
				+            ],
			
 
				+            "unit": ["", "Gg", "Gg", "Gg", "GgCO2eq", "GgCO2eq", "GgCO2eq", "GgCO2eq"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2, 3],
			
 
				         # "drop_cols": ['NF3', 'SF6'],
			
@@ -155,13 +225,22 @@ table_defs = {
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018",
			
 
				     },
			
 
				-    '67': {
			
 
				-        "templates": ['67_1', '67_2'],
			
 
				+    "67": {
			
 
				+        "templates": ["67_1", "67_2"],
			
 
				         "header": {
			
 
				-            'entity': ['Greenhouse Gas Source and Sink Categories', 'HFC-23', 'HFC-32',
			
 
				-                       'HFC-41', 'HFC-125', 'HFC-134a', 'HFC-143a', 'HFC-152a',
			
 
				-                       'HFC-227ea', 'HFC-43-10mee'],
			
 
				-            'unit': ['', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg'],
			
 
				+            "entity": [
			
 
				+                "Greenhouse Gas Source and Sink Categories",
			
 
				+                "HFC-23",
			
 
				+                "HFC-32",
			
 
				+                "HFC-41",
			
 
				+                "HFC-125",
			
 
				+                "HFC-134a",
			
 
				+                "HFC-143a",
			
 
				+                "HFC-152a",
			
 
				+                "HFC-227ea",
			
 
				+                "HFC-43-10mee",
			
 
				+            ],
			
 
				+            "unit": ["", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg", "kg"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2, 3],
			
 
				         # "drop_cols": ['NF3', 'SF6'],
			
@@ -170,24 +249,31 @@ table_defs = {
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018_fgases",
			
 
				     },
			
 
				-    '68': {
			
 
				-        "templates": ['68_1', '68_2'],
			
 
				+    "68": {
			
 
				+        "templates": ["68_1", "68_2"],
			
 
				         "header": {
			
 
				-            'entity': ['Greenhouse Gas Source and Sink Categories', 'PFC-14',
			
 
				-                       'PFC-116', 'PFC-218', 'PFC-318', 'SF6', 'NF3'],
			
 
				-            'unit': ['', 'kg', 'kg', 'kg', 'kg', 'kg', 'kg'],
			
 
				+            "entity": [
			
 
				+                "Greenhouse Gas Source and Sink Categories",
			
 
				+                "PFC-14",
			
 
				+                "PFC-116",
			
 
				+                "PFC-218",
			
 
				+                "PFC-318",
			
 
				+                "SF6",
			
 
				+                "NF3",
			
 
				+            ],
			
 
				+            "unit": ["", "kg", "kg", "kg", "kg", "kg", "kg"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2],
			
 
				-         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				+        "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2018,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018_fgases",
			
 
				     },
			
 
				-    '84': {
			
 
				-        "templates": ['84_1', '84_2'],
			
 
				+    "84": {
			
 
				+        "templates": ["84_1", "84_2"],
			
 
				         "header": {
			
 
				-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC'],
			
 
				-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
			
 
				+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC"],
			
 
				+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2, 3, 4, 5],
			
 
				         "category_col": "Categories",
			
@@ -195,11 +281,11 @@ table_defs = {
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018",
			
 
				     },
			
 
				-    '85': {
			
 
				-        "templates": ['85_1', '85_2'],
			
 
				+    "85": {
			
 
				+        "templates": ["85_1", "85_2"],
			
 
				         "header": {
			
 
				-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC'],
			
 
				-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
			
 
				+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC"],
			
 
				+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2, 3, 4, 5],
			
 
				         "category_col": "Categories",
			
@@ -207,11 +293,11 @@ table_defs = {
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018",
			
 
				     },
			
 
				-    '92': {
			
 
				-        "templates": ['92'],
			
 
				+    "92": {
			
 
				+        "templates": ["92"],
			
 
				         "header": {
			
 
				-            'entity': ['Categories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOC', 'SO2'],
			
 
				-            'unit': ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg'],
			
 
				+            "entity": ["Categories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOC", "SO2"],
			
 
				+            "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				         },
			
 
				         "drop_rows": [0, 1, 2],
			
 
				         "category_col": "Categories",
			
@@ -219,43 +305,43 @@ table_defs = {
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "2018",
			
 
				     },
			
 
				-    '95': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "95": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2016,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "other",
			
 
				     },
			
 
				-    '96': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "96": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2014,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "other",
			
 
				     },
			
 
				-    '97': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "97": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2012,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "other",
			
 
				     },
			
 
				-    '98': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "98": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2010,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "other",
			
 
				     },
			
 
				-    '99': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "99": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 2000,
			
 
				         # "unit_info": unit_info_2018,
			
 
				         "coords_value_mapping": "other",
			
 
				     },
			
 
				-    '100': {
			
 
				-        "templates": ['95_1', '95_2'],
			
 
				+    "100": {
			
 
				+        "templates": ["95_1", "95_2"],
			
 
				         "category_col": "Greenhouse Gas Source and Sink Categories",
			
 
				         "year": 1994,
			
 
				         # "unit_info": unit_info_2018,
			
@@ -264,12 +350,12 @@ table_defs = {
 
				 }
			
 
				 
			
 
				 cat_names_fix = {
			
 
				-    '14Ab Residential': '1A4b Residential',
			
 
				+    "14Ab Residential": "1A4b Residential",
			
 
				 }
			
 
				 
			
 
				 values_replacement = {
			
 
				-#    '': '-',
			
 
				-    ' ': '',
			
 
				+    #    '': '-',
			
 
				+    " ": "",
			
 
				 }
			
 
				 
			
 
				 gwp_to_use = "AR5GWP100"
			
@@ -281,28 +367,28 @@ unit_row = "header"
 
				 
			
 
				 ## parameters part 2: conversion to PRIMAP2 interchnage format
			
 
				 
			
 
				-cats_remove = ['Information items']
			
 
				+cats_remove = ["Information items"]
			
 
				 
			
 
				 cat_codes_manual = {
			
 
				-    'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
			
 
				-    'Total National Emissions and Removals': '0',
			
 
				-    'Total (Net) National Emissions': '0',
			
 
				-    'Clinical Waste Incineration': 'M.4.C.1',
			
 
				-    'Hazardous Waste Incineration': 'M.4.C.2',
			
 
				+    "CO2 from Biomass Combustion for Energy Production": "M.BIO",
			
 
				+    "Total National Emissions and Removals": "0",
			
 
				+    "Total (Net) National Emissions": "0",
			
 
				+    "Clinical Waste Incineration": "M.4.C.1",
			
 
				+    "Hazardous Waste Incineration": "M.4.C.2",
			
 
				     #'3 AGRICULTURE': 'M.AG',
			
 
				-    '3 AGRICULTURE, FORESTRY AND OTHER LAND USE': '3',
			
 
				+    "3 AGRICULTURE, FORESTRY AND OTHER LAND USE": "3",
			
 
				     #'3 LAND USE, LAND-USE CHANGE AND FORESTRY': 'M.LULUCF',
			
 
				 }
			
 
				 
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[A-Za-z0-9]{1,7})\s.*'
			
 
				+cat_code_regexp = r"(?P<code>^[A-Za-z0-9]{1,7})\s.*"
			
 
				 
			
 
				 # special header as category code and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 coords_terminologies = {
			
 
				     "area": "ISO3",
			
 
				-    "category": "IPCC2006_PRIMAP", #two extra categories
			
 
				+    "category": "IPCC2006_PRIMAP",  # two extra categories
			
 
				     "scenario": "PRIMAP",
			
 
				 }
			
 
				 
			
@@ -310,63 +396,59 @@ coords_defaults = {
 
				     "source": "SGP-GHG-inventory ",
			
 
				     "provenance": "measured",
			
 
				     "area": "SGP",
			
 
				-    "scenario": "BUR5"
			
 
				+    "scenario": "BUR5",
			
 
				 }
			
 
				 
			
 
				 coords_value_mapping = {
			
 
				     "2018": {
			
 
				         "unit": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'HFCs': f'HFCS ({gwp_to_use})',
			
 
				-            'PFCs': f'PFCS ({gwp_to_use})',
			
 
				-            'CH4': 'CH4',
			
 
				-            'N2O': 'N2O',
			
 
				-            'NF3': f'NF3 ({gwp_to_use})',
			
 
				-            'Net CO2': 'CO2',
			
 
				-            'SF6': f'SF6 ({gwp_to_use})',
			
 
				-            'Total (Net) National Emissions': 'KYOTOGHG (AR5GWP100)',
			
 
				+            "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+            "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+            "CH4": "CH4",
			
 
				+            "N2O": "N2O",
			
 
				+            "NF3": f"NF3 ({gwp_to_use})",
			
 
				+            "Net CO2": "CO2",
			
 
				+            "SF6": f"SF6 ({gwp_to_use})",
			
 
				+            "Total (Net) National Emissions": "KYOTOGHG (AR5GWP100)",
			
 
				         },
			
 
				     },
			
 
				     "2018_fgases": {
			
 
				         "unit": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'HFC-125': 'HFC125',
			
 
				-            'HFC-134a': 'HFC134a',
			
 
				-            'HFC-143a': 'HFC143a',
			
 
				-            'HFC-152a': 'HFC152a',
			
 
				-            'HFC-227ea': 'HFC227ea',
			
 
				-            'HFC-23': 'HFC23',
			
 
				-            'HFC-32': 'HFC32',
			
 
				-            'HFC-41': 'HFC41',
			
 
				-            'HFC-43-10mee': 'HFC4310mee',
			
 
				-            'NF3': 'NF3',
			
 
				-            'PFC-116': 'C2F6',
			
 
				-            'PFC-14': 'CF4',
			
 
				-            'PFC-218': 'C3F8',
			
 
				-            'PFC-318': 'cC4F8',
			
 
				-            'SF6': 'SF6',
			
 
				+            "HFC-125": "HFC125",
			
 
				+            "HFC-134a": "HFC134a",
			
 
				+            "HFC-143a": "HFC143a",
			
 
				+            "HFC-152a": "HFC152a",
			
 
				+            "HFC-227ea": "HFC227ea",
			
 
				+            "HFC-23": "HFC23",
			
 
				+            "HFC-32": "HFC32",
			
 
				+            "HFC-41": "HFC41",
			
 
				+            "HFC-43-10mee": "HFC4310mee",
			
 
				+            "NF3": "NF3",
			
 
				+            "PFC-116": "C2F6",
			
 
				+            "PFC-14": "CF4",
			
 
				+            "PFC-218": "C3F8",
			
 
				+            "PFC-318": "cC4F8",
			
 
				+            "SF6": "SF6",
			
 
				         },
			
 
				     },
			
 
				     "other": {
			
 
				         "unit": "PRIMAP1",
			
 
				         "entity": {
			
 
				-            'HFCs': f'HFCS ({gwp_to_use})',
			
 
				-            'CH4': f'CH4 ({gwp_to_use})',
			
 
				-            'N2O': f'N2O ({gwp_to_use})',
			
 
				-            'NF3': f'NF3 ({gwp_to_use})',
			
 
				-            'Net CO2': 'CO2',
			
 
				-            'PFCs': f'PFCS ({gwp_to_use})',
			
 
				-            'SF6': f'SF6 ({gwp_to_use})',
			
 
				-            'Total (Net) National Emissions': f'KYOTOGHG ({gwp_to_use})',
			
 
				+            "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+            "CH4": f"CH4 ({gwp_to_use})",
			
 
				+            "N2O": f"N2O ({gwp_to_use})",
			
 
				+            "NF3": f"NF3 ({gwp_to_use})",
			
 
				+            "Net CO2": "CO2",
			
 
				+            "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+            "SF6": f"SF6 ({gwp_to_use})",
			
 
				+            "Total (Net) National Emissions": f"KYOTOGHG ({gwp_to_use})",
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				-coords_cols = {
			
 
				-    "category": "category",
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit"
			
 
				-}
			
 
				+coords_cols = {"category": "category", "entity": "entity", "unit": "unit"}
			
 
				 
			
 
				 add_coords_cols = {
			
 
				     "orig_cat_name": ["orig_cat_name", "category"],
			
@@ -386,7 +468,7 @@ meta_data = {
 
				     "rights": "",
			
 
				     "contact": "mail@johannes-guetschow.de",
			
 
				     "title": "Singapore's Fifth National Communication and Fifth Biannial Update "
			
 
				-             "Report",
			
 
				+    "Report",
			
 
				     "comment": "Read fom pdf file by Johannes Gütschow",
			
 
				     "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				 }
			
@@ -394,92 +476,165 @@ meta_data = {
 
				 
			
 
				 ## processing
			
 
				 aggregate_sectors = {
			
 
				-    '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
			
 
				-          'name': 'IPPU'},
			
 
				-    'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'], 'name': 'Emissions from Biomass Burning (Agriculture)'},
			
 
				-    'M.3.C.1.LU': {'sources': ['3.C.1.a', '3.C.1.d'], 'name': 'Emissions from Biomass Burning (LULUCF)'},
			
 
				-    'M.3.C.AG': {'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
			
 
				-                             '3.C.6', '3.C.7', '3.C.8'],
			
 
				-                 'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				-    'M.AG': {'sources': ['M.AG.ELV', '3.A'], 'name': 'Agriculture'},
			
 
				-    'M.LULUCF': {'sources': ['M.3.C.1.LU', '3.B', '3.D'],
			
 
				-                 'name': 'Land Use, Land Use Change, and Forestry'},
			
 
				-    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National Total Excluding LULUCF'},
			
 
				-    '0': {'sources': ['1', '2', '3', '4', '5'], 'name': 'National Total'},
			
 
				+    "2": {
			
 
				+        "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
			
 
				+        "name": "IPPU",
			
 
				+    },
			
 
				+    "M.3.C.1.AG": {
			
 
				+        "sources": ["3.C.1.b", "3.C.1.c"],
			
 
				+        "name": "Emissions from Biomass Burning (Agriculture)",
			
 
				+    },
			
 
				+    "M.3.C.1.LU": {
			
 
				+        "sources": ["3.C.1.a", "3.C.1.d"],
			
 
				+        "name": "Emissions from Biomass Burning (LULUCF)",
			
 
				+    },
			
 
				+    "M.3.C.AG": {
			
 
				+        "sources": [
			
 
				+            "M.3.C.1.AG",
			
 
				+            "3.C.2",
			
 
				+            "3.C.3",
			
 
				+            "3.C.4",
			
 
				+            "3.C.5",
			
 
				+            "3.C.6",
			
 
				+            "3.C.7",
			
 
				+            "3.C.8",
			
 
				+        ],
			
 
				+        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+    },
			
 
				+    "M.AG.ELV": {
			
 
				+        "sources": ["M.3.C.AG"],
			
 
				+        "name": "Agriculture excluding livestock emissions",
			
 
				+    },
			
 
				+    "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
			
 
				+    "M.LULUCF": {
			
 
				+        "sources": ["M.3.C.1.LU", "3.B", "3.D"],
			
 
				+        "name": "Land Use, Land Use Change, and Forestry",
			
 
				+    },
			
 
				+    "M.0.EL": {
			
 
				+        "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+        "name": "National Total Excluding LULUCF",
			
 
				+    },
			
 
				+    "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
			
 
				 }
			
 
				 
			
 
				 
			
 
				 processing_info_step1 = {
			
 
				     # aggregate IPPU which is missing for individual fgases so it can be used in the
			
 
				     # next step (downscaling)
			
 
				-    'aggregate_cats': {
			
 
				-        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
			
 
				-              'name': 'IPPU'},
			
 
				+    "aggregate_cats": {
			
 
				+        "2": {
			
 
				+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
			
 
				+            "name": "IPPU",
			
 
				+        },
			
 
				     },
			
 
				-    'tolerance': 1, # because ch4 is inconsistent
			
 
				+    "tolerance": 1,  # because ch4 is inconsistent
			
 
				 }
			
 
				 
			
 
				-processing_info_step2 =  {
			
 
				-    'aggregate_cats': aggregate_sectors,
			
 
				-    'downscale': {
			
 
				-        'sectors': {
			
 
				-            'IPPU': {
			
 
				-                'basket': '2',
			
 
				-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.E',
			
 
				-                                    '2.F', '2.G', '2.H'],
			
 
				-                'entities': ['CO2', 'N2O', f'PFCS ({gwp_to_use})',
			
 
				-                             f'HFCS ({gwp_to_use})', 'SF6', 'NF3'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+processing_info_step2 = {
			
 
				+    "aggregate_cats": aggregate_sectors,
			
 
				+    "downscale": {
			
 
				+        "sectors": {
			
 
				+            "IPPU": {
			
 
				+                "basket": "2",
			
 
				+                "basket_contents": [
			
 
				+                    "2.A",
			
 
				+                    "2.B",
			
 
				+                    "2.C",
			
 
				+                    "2.D",
			
 
				+                    "2.E",
			
 
				+                    "2.F",
			
 
				+                    "2.G",
			
 
				+                    "2.H",
			
 
				+                ],
			
 
				+                "entities": [
			
 
				+                    "CO2",
			
 
				+                    "N2O",
			
 
				+                    f"PFCS ({gwp_to_use})",
			
 
				+                    f"HFCS ({gwp_to_use})",
			
 
				+                    "SF6",
			
 
				+                    "NF3",
			
 
				+                ],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				             # AFOLU downscaling. Most is zero anyway
			
 
				-            '3C': {
			
 
				-                'basket': '3.C',
			
 
				-                'basket_contents': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
			
 
				-                                    '3.C.6', '3.C.7', '3.C.8'],
			
 
				-                'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "3C": {
			
 
				+                "basket": "3.C",
			
 
				+                "basket_contents": [
			
 
				+                    "3.C.1",
			
 
				+                    "3.C.2",
			
 
				+                    "3.C.3",
			
 
				+                    "3.C.4",
			
 
				+                    "3.C.5",
			
 
				+                    "3.C.6",
			
 
				+                    "3.C.7",
			
 
				+                    "3.C.8",
			
 
				+                ],
			
 
				+                "entities": ["CO2", "CH4", "N2O"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            '3C1': {
			
 
				-                'basket': '3.C.1',
			
 
				-                'basket_contents': ['3.C.1.a', '3.C.1.b', '3.C.1.c', '3.C.1.d'],
			
 
				-                'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "3C1": {
			
 
				+                "basket": "3.C.1",
			
 
				+                "basket_contents": ["3.C.1.a", "3.C.1.b", "3.C.1.c", "3.C.1.d"],
			
 
				+                "entities": ["CO2", "CH4", "N2O"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				-            '3D': {
			
 
				-                'basket': '3.D',
			
 
				-                'basket_contents': ['3.D.1', '3.D.2'],
			
 
				-                'entities': ['CO2', 'CH4', 'N2O'],
			
 
				-                'dim': 'category (IPCC2006_PRIMAP)',
			
 
				+            "3D": {
			
 
				+                "basket": "3.D",
			
 
				+                "basket_contents": ["3.D.1", "3.D.2"],
			
 
				+                "entities": ["CO2", "CH4", "N2O"],
			
 
				+                "dim": "category (IPCC2006_PRIMAP)",
			
 
				             },
			
 
				         },
			
 
				-        'entities': {
			
 
				-            'HFCS': {
			
 
				-                'basket': f'HFCS ({gwp_to_use})',
			
 
				-                'basket_contents': ['HFC125', 'HFC134a', 'HFC143a', 'HFC23',
			
 
				-                                    'HFC32', 'HFC4310mee', 'HFC227ea'],
			
 
				-                'sel': {'category (IPCC2006_PRIMAP)':
			
 
				-                            ['0', '2', '2.C', '2.E',
			
 
				-                             '2.F', '2.G', '2.H']},
			
 
				+        "entities": {
			
 
				+            "HFCS": {
			
 
				+                "basket": f"HFCS ({gwp_to_use})",
			
 
				+                "basket_contents": [
			
 
				+                    "HFC125",
			
 
				+                    "HFC134a",
			
 
				+                    "HFC143a",
			
 
				+                    "HFC23",
			
 
				+                    "HFC32",
			
 
				+                    "HFC4310mee",
			
 
				+                    "HFC227ea",
			
 
				+                ],
			
 
				+                "sel": {
			
 
				+                    "category (IPCC2006_PRIMAP)": [
			
 
				+                        "0",
			
 
				+                        "2",
			
 
				+                        "2.C",
			
 
				+                        "2.E",
			
 
				+                        "2.F",
			
 
				+                        "2.G",
			
 
				+                        "2.H",
			
 
				+                    ]
			
 
				+                },
			
 
				             },
			
 
				-            'PFCS': {
			
 
				-                'basket': f'PFCS ({gwp_to_use})',
			
 
				-                'basket_contents': ['C2F6', 'C3F8', 'CF4', 'cC4F8'],
			
 
				-                'sel': {'category (IPCC2006_PRIMAP)':
			
 
				-                            ['0', '2', '2.C', '2.E',
			
 
				-                             '2.F', '2.G', '2.H']},
			
 
				+            "PFCS": {
			
 
				+                "basket": f"PFCS ({gwp_to_use})",
			
 
				+                "basket_contents": ["C2F6", "C3F8", "CF4", "cC4F8"],
			
 
				+                "sel": {
			
 
				+                    "category (IPCC2006_PRIMAP)": [
			
 
				+                        "0",
			
 
				+                        "2",
			
 
				+                        "2.C",
			
 
				+                        "2.E",
			
 
				+                        "2.F",
			
 
				+                        "2.G",
			
 
				+                        "2.H",
			
 
				+                    ]
			
 
				+                },
			
 
				             },
			
 
				-        }
			
 
				+        },
			
 
				     },
			
 
				-    'remove_ts': {
			
 
				-        'fgases': { # unnecessary and complicates aggregation for
			
 
				+    "remove_ts": {
			
 
				+        "fgases": {  # unnecessary and complicates aggregation for
			
 
				             # other gases
			
 
				-            'category': ['5', '5.B'],
			
 
				-            'entities': [f'HFCS ({gwp_to_use})', f'PFCS ({gwp_to_use})', 'SF6', 'NF3'],
			
 
				+            "category": ["5", "5.B"],
			
 
				+            "entities": [f"HFCS ({gwp_to_use})", f"PFCS ({gwp_to_use})", "SF6", "NF3"],
			
 
				         },
			
 
				-        'CH4': { # inconsistent with IPPU sector
			
 
				-            'category': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
			
 
				-            'entities': ['CH4'],
			
 
				+        "CH4": {  # inconsistent with IPPU sector
			
 
				+            "category": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
			
 
				+            "entities": ["CH4"],
			
 
				         },
			
 
				     },
			
 
				     # 'basket_copy': {
			
@@ -488,6 +643,3 @@ processing_info_step2 =  {
 
				     #     'source_GWP': gwp_to_use,
			
 
				     # },
			
 
				 }
			
 
				-
			
 
				-
			
 
				-
			
--- a/src/unfccc_ghg_data/unfccc_reader/Singapore/read_SGP_BUR5_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Singapore/read_SGP_BUR5_from_pdf.py
@@ -1,12 +1,26 @@
 
				-# read Singapore fifth BUR from pdf
			
 
				+"""
			
 
				+Read Singapore's BUR5 from pdf
			
 
				 
			
 
				+This script reads data from Singapore's BUR5
			
 
				+Data are read from pdf using camelot
			
 
				 
			
 
				+"""
			
 
				 import locale
			
 
				 
			
 
				-#import numpy as np
			
 
				+# import numpy as np
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				 from .config_sgp_bur5 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
@@ -26,29 +40,20 @@ from .config_sgp_bur5 import (
 
				     table_defs,
			
 
				     values_replacement,
			
 
				 )
			
 
				-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				-
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    fix_rows,
			
 
				-    gas_baskets,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     ### genral configuration
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Singapore' / 'BUR5'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Singapore'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Singapore" / "BUR5"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Singapore"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'SGP_BUR5_2022_'
			
 
				-    inventory_file_pdf = 'Singapore_-_NC5BUR5.pdf'
			
 
				-    #years_to_read = range(1990, 2018 + 1)
			
 
				+    output_filename = "SGP_BUR5_2022_"
			
 
				+    inventory_file_pdf = "Singapore_-_NC5BUR5.pdf"
			
 
				+    # years_to_read = range(1990, 2018 + 1)
			
 
				 
			
 
				     # define locale to use for str to float conversion
			
 
				-    locale_to_use = 'en_SG.UTF-8'
			
 
				+    locale_to_use = "en_SG.UTF-8"
			
 
				     locale.setlocale(locale.LC_NUMERIC, locale_to_use)
			
 
				 
			
 
				     pagesToRead = table_defs.keys()
			
@@ -69,9 +74,14 @@ if __name__ == "__main__":
 
				             print(f"Reading table {table_on_page}")
			
 
				             area = table_def_templates[table_on_page]["area"]
			
 
				             cols = table_def_templates[table_on_page]["cols"]
			
 
				-            tables = camelot.read_pdf(str(input_folder / inventory_file_pdf),
			
 
				-                                      pages=str(page), flavor='stream',
			
 
				-                                      table_areas=area, columns=cols, split_text=True)
			
 
				+            tables = camelot.read_pdf(
			
 
				+                str(input_folder / inventory_file_pdf),
			
 
				+                pages=str(page),
			
 
				+                flavor="stream",
			
 
				+                table_areas=area,
			
 
				+                columns=cols,
			
 
				+                split_text=True,
			
 
				+            )
			
 
				 
			
 
				             df_current = tables[0].df.copy(deep=True)
			
 
				             # drop the old header
			
@@ -79,39 +89,52 @@ if __name__ == "__main__":
 
				                 df_current = df_current.drop(table_defs[page]["drop_rows"])
			
 
				             elif "drop_rows" in table_def_templates[table_on_page].keys():
			
 
				                 df_current = df_current.drop(
			
 
				-                    table_def_templates[table_on_page]["drop_rows"])
			
 
				+                    table_def_templates[table_on_page]["drop_rows"]
			
 
				+                )
			
 
				             # add new header
			
 
				-            if 'header' in table_defs[page].keys():
			
 
				+            if "header" in table_defs[page].keys():
			
 
				                 df_current.columns = pd.MultiIndex.from_tuples(
			
 
				-                    zip(table_defs[page]['header']['entity'],
			
 
				-                        table_defs[page]['header']['unit']))
			
 
				+                    zip(
			
 
				+                        table_defs[page]["header"]["entity"],
			
 
				+                        table_defs[page]["header"]["unit"],
			
 
				+                    )
			
 
				+                )
			
 
				             else:
			
 
				                 df_current.columns = pd.MultiIndex.from_tuples(
			
 
				-                    zip(table_def_templates[table_on_page]['header']['entity'],
			
 
				-                        table_def_templates[table_on_page]['header']['unit']))
			
 
				+                    zip(
			
 
				+                        table_def_templates[table_on_page]["header"]["entity"],
			
 
				+                        table_def_templates[table_on_page]["header"]["unit"],
			
 
				+                    )
			
 
				+                )
			
 
				 
			
 
				             # drop cols if necessary
			
 
				             if "drop_cols" in table_defs[page].keys():
			
 
				-                # print(df_current.columns.values)
			
 
				+                # print(df_current.columns.to_numpy())
			
 
				                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
			
 
				             elif "drop_cols" in table_def_templates[table_on_page].keys():
			
 
				                 df_current = df_current.drop(columns=table_defs[page]["drop_cols"])
			
 
				 
			
 
				             # rename category column
			
 
				-            df_current.rename(columns={table_defs[page]["category_col"]: index_cols[0]},
			
 
				-                              inplace=True)
			
 
				+            df_current = df_current.rename(
			
 
				+                columns={table_defs[page]["category_col"]: index_cols[0]}
			
 
				+            )
			
 
				 
			
 
				             # replace double \n
			
 
				             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("\n", " ")
			
 
				             # replace double and triple spaces
			
 
				-            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("   ", " ")
			
 
				+            df_current[index_cols[0]] = df_current[index_cols[0]].str.replace(
			
 
				+                "   ", " "
			
 
				+            )
			
 
				             df_current[index_cols[0]] = df_current[index_cols[0]].str.replace("  ", " ")
			
 
				 
			
 
				             # fix the split rows
			
 
				             for n_rows in table_def_templates[table_on_page]["rows_to_fix"].keys():
			
 
				-                df_current = fix_rows(df_current,
			
 
				-                                      table_def_templates[table_on_page]["rows_to_fix"][
			
 
				-                                          n_rows], index_cols[0], n_rows)
			
 
				+                df_current = fix_rows(
			
 
				+                    df_current,
			
 
				+                    table_def_templates[table_on_page]["rows_to_fix"][n_rows],
			
 
				+                    index_cols[0],
			
 
				+                    n_rows,
			
 
				+                )
			
 
				 
			
 
				             # replace category names with typos
			
 
				             df_current[index_cols[0]] = df_current[index_cols[0]].replace(cat_names_fix)
			
@@ -122,7 +145,7 @@ if __name__ == "__main__":
 
				             # set index
			
 
				             # df_current = df_current.set_index(index_cols)
			
 
				             # strip trailing and leading  and remove "^"
			
 
				-            for col in df_current.columns.values:
			
 
				+            for col in df_current.columns.to_numpy():
			
 
				                 df_current[col] = df_current[col].str.strip()
			
 
				                 df_current[col] = df_current[col].str.replace("^", "")
			
 
				 
			
@@ -132,19 +155,24 @@ if __name__ == "__main__":
 
				                 df_this_page = df_current.copy(deep=True)
			
 
				             else:
			
 
				                 # find intersecting cols
			
 
				-                cols_this_page = df_this_page.columns.values
			
 
				+                cols_this_page = df_this_page.columns.to_numpy()
			
 
				                 # print(f"cols this page: {cols_this_page}")
			
 
				-                cols_current = df_current.columns.values
			
 
				+                cols_current = df_current.columns.to_numpy()
			
 
				                 # print(f"cols current: {cols_current}")
			
 
				                 cols_both = list(set(cols_this_page).intersection(set(cols_current)))
			
 
				                 # print(f"cols both: {cols_both}")
			
 
				                 if len(cols_both) > 0:
			
 
				-                    df_this_page = df_this_page.merge(df_current, how='outer', on=cols_both,
			
 
				-                                                      suffixes=(None, None))
			
 
				+                    df_this_page = df_this_page.merge(
			
 
				+                        df_current, how="outer", on=cols_both, suffixes=(None, None)
			
 
				+                    )
			
 
				                 else:
			
 
				-                    df_this_page = df_this_page.merge(df_current, how='outer',
			
 
				-                                                      left_index=True, right_index=True,
			
 
				-                                                      suffixes=(None, None))
			
 
				+                    df_this_page = df_this_page.merge(
			
 
				+                        df_current,
			
 
				+                        how="outer",
			
 
				+                        left_index=True,
			
 
				+                        right_index=True,
			
 
				+                        suffixes=(None, None),
			
 
				+                    )
			
 
				 
			
 
				                 df_this_page = df_this_page.groupby(index_cols).first().reset_index()
			
 
				                 # print(df_this_page)
			
@@ -152,28 +180,34 @@ if __name__ == "__main__":
 
				 
			
 
				         # set index and convert to long format
			
 
				         df_this_page = df_this_page.set_index(index_cols)
			
 
				-        df_this_page_long = pm2.pm2io.nir_convert_df_to_long(df_this_page,
			
 
				-                                                             table_defs[page]["year"],
			
 
				-                                                             header_long)
			
 
				+        df_this_page_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_this_page, table_defs[page]["year"], header_long
			
 
				+        )
			
 
				 
			
 
				         # drop the rows with memo items etc
			
 
				         for cat in cats_remove:
			
 
				             df_this_page_long = df_this_page_long.drop(
			
 
				-                df_this_page_long.loc[df_this_page_long.loc[:, index_cols[0]] == cat].index)
			
 
				+                df_this_page_long.loc[
			
 
				+                    df_this_page_long.loc[:, index_cols[0]] == cat
			
 
				+                ].index
			
 
				+            )
			
 
				 
			
 
				         # make a copy of the categories row
			
 
				         df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, index_cols[0]]
			
 
				 
			
 
				         # replace cat names by codes in col "Categories"
			
 
				         # first the manual replacements
			
 
				-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:, "category"].replace(
			
 
				-            cat_codes_manual)
			
 
				+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
			
 
				+            :, "category"
			
 
				+        ].replace(cat_codes_manual)
			
 
				+
			
 
				         # then the regex repalcements
			
 
				-        def repl(m):
			
 
				-            return convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				-        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[:,
			
 
				-                                               "category"].str.replace(cat_code_regexp,
			
 
				-                                                                       repl, regex=True)
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return convert_ipcc_code_primap_to_primap2("IPC" + m.group("code"))
			
 
				+
			
 
				+        df_this_page_long.loc[:, "category"] = df_this_page_long.loc[
			
 
				+            :, "category"
			
 
				+        ].str.replace(cat_code_regexp, repl, regex=True)
			
 
				         df_this_page_long.loc[:, "category"].unique()
			
 
				 
			
 
				         # strip spaces in data col
			
@@ -185,27 +219,29 @@ if __name__ == "__main__":
 
				         df_this_page_long.columns = df_this_page_long.columns.map(str)
			
 
				 
			
 
				         # remove thousands separators as pd.to_numeric can't deal with that
			
 
				-        df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(',',
			
 
				-                                                                                        '')
			
 
				+        df_this_page_long.loc[:, "data"] = df_this_page_long.loc[:, "data"].str.replace(
			
 
				+            ",", ""
			
 
				+        )
			
 
				 
			
 
				         # drop orig cat name as it's not unique over all tables (keep until here in case
			
 
				         # it's needed for debugging)
			
 
				-        df_this_page_long = df_this_page_long.drop(columns='orig_cat_name')
			
 
				+        df_this_page_long = df_this_page_long.drop(columns="orig_cat_name")
			
 
				 
			
 
				         data_page_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				             df_this_page_long,
			
 
				             coords_cols=coords_cols,
			
 
				-            #add_coords_cols=add_coords_cols,
			
 
				+            # add_coords_cols=add_coords_cols,
			
 
				             coords_defaults=coords_defaults,
			
 
				             coords_terminologies=coords_terminologies,
			
 
				             coords_value_mapping=coords_value_mapping[
			
 
				-                table_defs[page]["coords_value_mapping"]],
			
 
				+                table_defs[page]["coords_value_mapping"]
			
 
				+            ],
			
 
				             # coords_value_filling=coords_value_filling,
			
 
				             filter_remove=filter_remove,
			
 
				             # filter_keep=filter_keep,
			
 
				             meta_data=meta_data,
			
 
				             convert_str=True,
			
 
				-            time_format='%Y',
			
 
				+            time_format="%Y",
			
 
				         )
			
 
				 
			
 
				         # conversion to PRIMAP2 native format
			
@@ -226,13 +262,16 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw"), data_if)
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				     data_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				-
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     #### processing
			
 
				     data_proc_pm2 = data_pm2
			
@@ -246,22 +285,21 @@ if __name__ == "__main__":
 
				         processing_info_country=processing_info_step1,
			
 
				     )
			
 
				 
			
 
				-
			
 
				     data_proc_pm2 = process_data_for_country(
			
 
				         data_proc_pm2,
			
 
				         entities_to_ignore=[],
			
 
				         gas_baskets=gas_baskets,
			
 
				         processing_info_country=processing_info_step2,
			
 
				-        cat_terminology_out = terminology_proc,
			
 
				-        #category_conversion = None,
			
 
				-        #sectors_out = None,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        # category_conversion = None,
			
 
				+        # sectors_out = None,
			
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				     # TODO: processing info is present twice
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -270,10 +308,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				-
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Taiwan's inventories
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'TWN'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=TWN
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2022.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2022.py
@@ -1,4 +1,10 @@
 
				-# config and functions for Taiwan NIR 2022
			
 
				+"""Config for Taiwan's 2022 inventory
			
 
				+
			
 
				+Partial configuration for camelot adn data aggregation. PRIMAP2 conversion
			
 
				+config and metadata are define din the reading script
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 from typing import Union
			
 
				 
			
@@ -6,9 +12,36 @@ import pandas as pd
 
				 
			
 
				 gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				-def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int)->pd.DataFrame:
			
 
				+
			
 
				+def fix_rows(
			
 
				+    data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
			
 
				+) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Combine split rows
			
 
				+
			
 
				+    This function combines rows which have been split into several rows during data
			
 
				+    reading from pdf because they contained line breaks.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    data: pd.DataFrame
			
 
				+        The data to work with
			
 
				+    rows_to_fix: list
			
 
				+        List of values for which to fix rows
			
 
				+    col_to_use: str
			
 
				+        column to use to find the rows to merge
			
 
				+    n_rows: int
			
 
				+        How many rows to combine for each row found. e.g. 3 means combine the found
			
 
				+        row with the following two rows. Negative values are used for more
			
 
				+        complicated situations where the rows to merge are also before the position
			
 
				+        of the value that indicates the merge. See code for details
			
 
				+
			
 
				+    Returns
			
 
				+    -------
			
 
				+        pandas DataFrame with combined rows. The individual rows are removed
			
 
				+    """
			
 
				     for row in rows_to_fix:
			
 
				-        #print(row)
			
 
				+        # print(row)
			
 
				         # find the row number and collect the row and the next two rows
			
 
				         index = data.loc[data[col_to_use] == row].index
			
 
				         if not list(index):
			
@@ -20,35 +53,35 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
 
				         for item in index:
			
 
				             loc = data.index.get_loc(item)
			
 
				             ####print(data[col_to_use].loc[loc + 1])
			
 
				-            if n_rows == -2:
			
 
				+            if n_rows == -2:  # noqa: PLR2004
			
 
				                 locs_to_merge = list(range(loc - 1, loc + 1))
			
 
				                 loc_to_check = loc - 1
			
 
				-            #if n_rows == -3:
			
 
				+            # if n_rows == -3:
			
 
				             #    locs_to_merge = list(range(loc - 1, loc + 2))
			
 
				-            #elif n_rows == -5:
			
 
				+            # elif n_rows == -5:
			
 
				             #    locs_to_merge = list(range(loc - 1, loc + 4))
			
 
				             else:
			
 
				                 locs_to_merge = list(range(loc, loc + n_rows))
			
 
				                 loc_to_check = loc + 1
			
 
				 
			
 
				-            if data[col_to_use].loc[loc_to_check] == '':
			
 
				+            if not data[col_to_use].loc[loc_to_check]:
			
 
				                 rows_to_merge = data.iloc[locs_to_merge]
			
 
				                 indices_to_merge = rows_to_merge.index
			
 
				                 # replace numerical NaN values
			
 
				                 ####print(rows_to_merge)
			
 
				-                rows_to_merge = rows_to_merge.fillna('')
			
 
				+                rows_to_merge = rows_to_merge.fillna("")
			
 
				                 ####print("fillna")
			
 
				                 ####print(rows_to_merge)
			
 
				                 # join the three rows
			
 
				-                new_row = rows_to_merge.agg(' '.join)
			
 
				+                new_row = rows_to_merge.agg(" ".join)
			
 
				                 # replace the double spaces that are created
			
 
				                 # must be done here and not at the end as splits are not always
			
 
				                 # the same and join would produce different col values
			
 
				                 new_row = new_row.str.replace("  ", " ")
			
 
				                 new_row = new_row.str.strip()
			
 
				-                #new_row = new_row.str.replace("N O", "NO")
			
 
				-                #new_row = new_row.str.replace(", N", ",N")
			
 
				-                #new_row = new_row.str.replace("- ", "-")
			
 
				+                # new_row = new_row.str.replace("N O", "NO")
			
 
				+                # new_row = new_row.str.replace(", N", ",N")
			
 
				+                # new_row = new_row.str.replace("- ", "-")
			
 
				                 data.loc[indices_to_merge[0]] = new_row
			
 
				                 indices_to_drop = indices_to_drop + list(indices_to_merge[1:])
			
 
				 
			
@@ -56,12 +89,43 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
 
				         data = data.reset_index(drop=True)
			
 
				     return data
			
 
				 
			
 
				-def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: list[Union[int, str]])->pd.DataFrame:
			
 
				+
			
 
				+def make_wide_table(
			
 
				+    data: pd.DataFrame,
			
 
				+    keyword: str,
			
 
				+    col: Union[int, str],
			
 
				+    index_cols: list[Union[int, str]],
			
 
				+) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Transform a table with sections for gases to a gas-wide table
			
 
				+
			
 
				+    Some tables are rolled up, i.e. the header repeats within the table and the
			
 
				+    tables are composed of several tables for different year ranges stacked on top of
			
 
				+    each other. These tables are unrolled and converted to a proper time-wide format
			
 
				+    without repetition of headers.
			
 
				+
			
 
				+    Parameters
			
 
				+    ----------
			
 
				+    data: pd.DataFrame
			
 
				+        the data to convert
			
 
				+    keyword: str
			
 
				+        The keyword used to identify the header, e.g. 'GHG Emission Sources and Sinks'
			
 
				+    col: int
			
 
				+        Column to look for the keyword
			
 
				+    index_cols: list[Union[int, str]]
			
 
				+        Columns to use as index for the output DataFrame
			
 
				+
			
 
				+
			
 
				+    Returns
			
 
				+    -------
			
 
				+        pandas DataFrame in time-wide format
			
 
				+
			
 
				+    """
			
 
				     index = data.loc[data[col] == keyword].index
			
 
				     if not list(index):
			
 
				         print("Keyword for table transformation not found")
			
 
				         return data
			
 
				-    elif len(index)==1:
			
 
				+    elif len(index) == 1:
			
 
				         print("Keyword for table transformation found only once")
			
 
				         return data
			
 
				     else:
			
@@ -74,83 +138,88 @@ def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], inde
 
				                 next_loc = data.index[-1] + 1
			
 
				             df_to_add = data.loc[list(range(loc, next_loc))]
			
 
				             # select only cols which don't have NaN, Null, or '' as header
			
 
				-            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
			
 
				-            df_to_add = df_to_add.loc[: , filter_nan]
			
 
				+            filter_nan = (
			
 
				+                (~df_to_add.iloc[0].isna())
			
 
				+                & (df_to_add.iloc[0] != "NaN")
			
 
				+                & (df_to_add.iloc[0])
			
 
				+            )
			
 
				+            df_to_add = df_to_add.loc[:, filter_nan]
			
 
				             df_to_add.columns = df_to_add.iloc[0]
			
 
				-            #print(df_to_add.columns)
			
 
				+            # print(df_to_add.columns)
			
 
				             df_to_add = df_to_add.drop(loc)
			
 
				             df_to_add = df_to_add.set_index(index_cols)
			
 
				 
			
 
				             if df_all is None:
			
 
				                 df_all = df_to_add
			
 
				             else:
			
 
				-                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
			
 
				+                df_all = pd.concat([df_all, df_to_add], axis=1, join="outer")
			
 
				         return df_all
			
 
				 
			
 
				 
			
 
				 # page defs tp hold information on reading the table
			
 
				 page_defs = {
			
 
				-    '5': {
			
 
				-        "table_areas": ['36,523,563,68'],
			
 
				+    "5": {
			
 
				+        "table_areas": ["36,523,563,68"],
			
 
				         "split_text": False,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '6': {
			
 
				-        "table_areas": ['34,562,563,53'],
			
 
				-        #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
			
 
				+    "6": {
			
 
				+        "table_areas": ["34,562,563,53"],
			
 
				+        # "columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '7': {
			
 
				-        "table_areas": ['36,740,499,482', '36,430,564,53'],
			
 
				+    "7": {
			
 
				+        "table_areas": ["36,740,499,482", "36,430,564,53"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '8': {
			
 
				-        "table_areas": ['35,748,503,567'],
			
 
				+    "8": {
			
 
				+        "table_areas": ["35,748,503,567"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '9': {
			
 
				-        "table_areas": ['35,747,565,315', '36,273,565,50'],
			
 
				+    "9": {
			
 
				+        "table_areas": ["35,747,565,315", "36,273,565,50"],
			
 
				         "split_text": False,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '11': {
			
 
				-        "table_areas": ['35,744,563,434'],
			
 
				+    "11": {
			
 
				+        "table_areas": ["35,744,563,434"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '12': {
			
 
				-        "table_areas": ['33,747,562,86'],
			
 
				+    "12": {
			
 
				+        "table_areas": ["33,747,562,86"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '13': {
			
 
				-        "table_areas": ['34,303,564,54'],
			
 
				+    "13": {
			
 
				+        "table_areas": ["34,303,564,54"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '14': {
			
 
				-        "table_areas": ['34,754,564,256'],
			
 
				-        "columns": ['220,251,283,314,344,371,406,438,470,500,530'],
			
 
				+    "14": {
			
 
				+        "table_areas": ["34,754,564,256"],
			
 
				+        "columns": ["220,251,283,314,344,371,406,438,470,500,530"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '15': {
			
 
				-        "table_areas": ['34,487,564,42'],
			
 
				+    "15": {
			
 
				+        "table_areas": ["34,487,564,42"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
 
				-    '16': {
			
 
				-        "table_areas": ['34,418,564,125'],
			
 
				-        #"columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
			
 
				+    "16": {
			
 
				+        "table_areas": ["34,418,564,125"],
			
 
				+        # "columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
			
 
				         "split_text": True,
			
 
				         "flavor": "lattice",
			
 
				-    }, # with stream the row index is messed up with lattice the column index ... red with lattice and fix col header manualy
			
 
				-    '17': {
			
 
				-        "table_areas": ['34,534,564,49'],
			
 
				-        "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
			
 
				+    },  # with stream the row index is messed up with lattice the column index ...
			
 
				+    # read with lattice and fix col header manually
			
 
				+    "17": {
			
 
				+        "table_areas": ["34,534,564,49"],
			
 
				+        "columns": ["188,232,263,298,331,362,398,432,464,497,530"],
			
 
				         "split_text": True,
			
 
				         "flavor": "stream",
			
 
				     },
			
@@ -158,38 +227,40 @@ page_defs = {
 
				 
			
 
				 # table defs to hold information on how to process the tables
			
 
				 table_defs = {
			
 
				-    'ES2.2': { # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
			
 
				+    "ES2.2": {  # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
			
 
				         "tables": [1, 2],
			
 
				         "rows_to_fix": {
			
 
				             0: {
			
 
				-                3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
			
 
				-                    '2.D Non-Energy Products from Fuels and',
			
 
				-                    '4. Land Use, Land Use Change and Forestry'],
			
 
				+                3: [
			
 
				+                    "1.A.4.c Agriculture, Forestry, Fishery, and",
			
 
				+                    "2.D Non-Energy Products from Fuels and",
			
 
				+                    "4. Land Use, Land Use Change and Forestry",
			
 
				+                ],
			
 
				             },
			
 
				         },
			
 
				-        "index_cols": ['GHG Emission Source and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Source and Sinks',
			
 
				+        "index_cols": ["GHG Emission Source and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Source and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "entity": "CO2",
			
 
				         "unit": "kt",
			
 
				         "cat_codes_manual": {
			
 
				-            'Net GHG Emission (including LULUCF)': '0',
			
 
				-            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
			
 
				+            "Net GHG Emission (including LULUCF)": "0",
			
 
				+            "Total GHG Emission (excluding LULUCF)": "M.0.EL",
			
 
				         },
			
 
				     },
			
 
				-    'ES2.3': { # 1990-2020 Methane Emissions in Taiwan
			
 
				+    "ES2.3": {  # 1990-2020 Methane Emissions in Taiwan
			
 
				         "tables": [3, 4],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "entity": f"CH4 ({gwp_to_use})",
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total Methane Emissions': '0',
			
 
				+            "Total Methane Emissions": "0",
			
 
				         },
			
 
				     },
			
 
				-    'ES2.4': { # 1990-2020 Nitrous Oxide Emissions in Taiwan
			
 
				+    "ES2.4": {  # 1990-2020 Nitrous Oxide Emissions in Taiwan
			
 
				         "tables": [5],
			
 
				         "fix_cats": {
			
 
				             0: {
			
@@ -197,33 +268,33 @@ table_defs = {
 
				             },
			
 
				         },
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "entity": f"N2O ({gwp_to_use})",
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total Nitrous Oxide Emissions': '0',
			
 
				+            "Total Nitrous Oxide Emissions": "0",
			
 
				         },
			
 
				     },
			
 
				-    'ES3.1': { # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
			
 
				+    "ES3.1": {  # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
			
 
				         "tables": [7],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "entity": f"KYOTOGHG ({gwp_to_use})",
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Net GHG Emission (including LULUCF)': '0',
			
 
				-            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
			
 
				+            "Net GHG Emission (including LULUCF)": "0",
			
 
				+            "Total GHG Emission (excluding LULUCF)": "M.0.EL",
			
 
				         },
			
 
				     },
			
 
				-    'ES3.2': { # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
			
 
				+    "ES3.2": {  # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
			
 
				         "tables": [8],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "gas_splitting": {
			
 
				             "Total CO2 Emission": "CO2",
			
@@ -234,17 +305,18 @@ table_defs = {
 
				         },
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total CO2 Emission': '1',
			
 
				-            'Total CH4 Emission': '1',
			
 
				-            'Total N2O Emission': '1',
			
 
				-            'Total Emission from Energy Sector': '1',
			
 
				+            "Total CO2 Emission": "1",
			
 
				+            "Total CH4 Emission": "1",
			
 
				+            "Total N2O Emission": "1",
			
 
				+            "Total Emission from Energy Sector": "1",
			
 
				         },
			
 
				     },
			
 
				-    'ES3.3': { # 1990-2020 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
			
 
				-        "tables": [9,10],
			
 
				+    "ES3.3": {  # 1990-2020 Greenhouse Gas Emissions Produced by Industrial
			
 
				+        # Process and Product Use Sector (IPPU) in Taiwan
			
 
				+        "tables": [9, 10],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "gas_splitting": {
			
 
				             "Total CO2 Emission": "CO2",
			
@@ -259,24 +331,26 @@ table_defs = {
 
				         },
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total CO2 Emission': '2',
			
 
				-            'Total CH4 Emission': '2',
			
 
				-            'Total N2O Emission': '2',
			
 
				-            'Total HFCs Emission': '2',
			
 
				-            'Total PFCs Emission (2.E Electronics Industry)': '2.E',
			
 
				-            'Total SF6 Emission': '2',
			
 
				-            'Total NF3 Emission (2.E Electronics Industry)': '2.E',
			
 
				-            'Total Emission from IPPU Sector': '2',
			
 
				+            "Total CO2 Emission": "2",
			
 
				+            "Total CH4 Emission": "2",
			
 
				+            "Total N2O Emission": "2",
			
 
				+            "Total HFCs Emission": "2",
			
 
				+            "Total PFCs Emission (2.E Electronics Industry)": "2.E",
			
 
				+            "Total SF6 Emission": "2",
			
 
				+            "Total NF3 Emission (2.E Electronics Industry)": "2.E",
			
 
				+            "Total Emission from IPPU Sector": "2",
			
 
				         },
			
 
				         "drop_rows": [
			
 
				-            ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
			
 
				-        ]
			
 
				+            ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"),  # has lower
			
 
				+            # significant digits than in table ES2.2
			
 
				+        ],
			
 
				     },
			
 
				-    'ES3.4': { # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
			
 
				+    "ES3.4": {  # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector
			
 
				+        # in Taiwan
			
 
				         "tables": [11],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				         "gas_splitting": {
			
 
				             "Total CO2 Emission (3.H Urea applied)": "CO2",
			
@@ -287,22 +361,22 @@ table_defs = {
 
				         },
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total CO2 Emission (3.H Urea applied)': '3.H',
			
 
				-            'Total CH4 Emission': '3',
			
 
				-            'Total N2O Emission': '3',
			
 
				-            'Total Emission From Agriculture Sector': '3',
			
 
				+            "Total CO2 Emission (3.H Urea applied)": "3.H",
			
 
				+            "Total CH4 Emission": "3",
			
 
				+            "Total N2O Emission": "3",
			
 
				+            "Total Emission From Agriculture Sector": "3",
			
 
				         },
			
 
				     },
			
 
				-    'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
			
 
				+    "ES3.6": {  # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
			
 
				         "tables": [13],
			
 
				         "rows_to_fix": {
			
 
				             0: {
			
 
				                 3: ["Total CO2 Emission"],
			
 
				             },
			
 
				         },
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				-        "col_wide_kwd": 0, # two column header
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				+        "col_wide_kwd": 0,  # two column header
			
 
				         "gas_splitting": {
			
 
				             "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
			
 
				             "Total CH4 Emission": f"CH4 ({gwp_to_use})",
			
@@ -312,51 +386,51 @@ table_defs = {
 
				         },
			
 
				         "unit": "ktCO2eq",
			
 
				         "cat_codes_manual": {
			
 
				-            'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
			
 
				-            'Total CH4 Emission': '5',
			
 
				-            'Total N2O Emission': '5',
			
 
				-            'Total Emission from Waste Sector': '5',
			
 
				+            "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "5.C",
			
 
				+            "Total CH4 Emission": "5",
			
 
				+            "Total N2O Emission": "5",
			
 
				+            "Total Emission from Waste Sector": "5",
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 table_defs_skip = {
			
 
				-    'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
			
 
				+    "ES2.1": {  # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
			
 
				         "tables": [0],
			
 
				         "rows_to_fix": {
			
 
				             0: {
			
 
				-                3: ['CO2'],
			
 
				+                3: ["CO2"],
			
 
				             },
			
 
				             1: {  # wherte col 0 is empty
			
 
				-                3: ['Net GHG Emission', 'Total GHG Emission'],
			
 
				+                3: ["Net GHG Emission", "Total GHG Emission"],
			
 
				             },
			
 
				         },
			
 
				-        "index_cols": ['GHG', 'GWP'],
			
 
				-        "wide_keyword": 'GHG',
			
 
				+        "index_cols": ["GHG", "GWP"],
			
 
				+        "wide_keyword": "GHG",
			
 
				         "col_wide_kwd": 0,
			
 
				         "unit": "ktCO2eq",
			
 
				     },
			
 
				-    'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
			
 
				+    "ES2.5": {  # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
			
 
				         "tables": [6],
			
 
				         "rows_to_fix": {
			
 
				             0: {
			
 
				-                -2: ['Total SF6 Emissions',
			
 
				-                     'Total NF3 Emissions'],
			
 
				+                -2: ["Total SF6 Emissions", "Total NF3 Emissions"],
			
 
				             },
			
 
				         },
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				         "col_wide_kwd": 0,
			
 
				-        #"entity": "CO2",
			
 
				+        # "entity": "CO2",
			
 
				         "unit": "ktCO2eq",
			
 
				     },
			
 
				-    'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
			
 
				+    "ES3.5": {  # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF
			
 
				+        # Sector in Taiwan2],
			
 
				         "tables": [12],
			
 
				         "rows_to_fix": {},
			
 
				-        "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
			
 
				-        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				-        "col_wide_kwd": 0, # two column header
			
 
				+        "index_cols": ["GHG Emission Sources and Sinks"],  # header is merged col :-(
			
 
				+        "wide_keyword": "GHG Emission Sources and Sinks",
			
 
				+        "col_wide_kwd": 0,  # two column header
			
 
				         "unit": "kt",
			
 
				         "entity": "CO2",
			
 
				-    }, # need to consider the two columns specially (merge?)
			
 
				+    },  # need to consider the two columns specially (merge?)
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
@@ -1,14 +1,24 @@
 
				-# this script reads data from Taiwan's 2022 national inventory
			
 
				-# Data is read from the english summary pdf
			
 
				-# TODO: add further GWPs and gas baskets
			
 
				+"""
			
 
				+Read Taiwan's 2022 national inventory from pdf
			
 
				+
			
 
				+This script reads data from Taiwan's 2022 national inventory
			
 
				+Data are read from the english summary pdf
			
 
				+TODO: add further GWPs and gas baskets
			
 
				+
			
 
				+"""
			
 
				 
			
 
				 import copy
			
 
				 
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_twn_nir2022 import (fix_rows, gwp_to_use, make_wide_table, page_defs,
			
 
				-                                 table_defs)
			
 
				+from config_twn_nir2022 import (
			
 
				+    fix_rows,
			
 
				+    gwp_to_use,
			
 
				+    make_wide_table,
			
 
				+    page_defs,
			
 
				+    table_defs,
			
 
				+)
			
 
				 from primap2.pm2io._data_reading import matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
@@ -17,16 +27,16 @@ if __name__ == "__main__":
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'non-UNFCCC' / 'Taiwan'
			
 
				+    input_folder = downloaded_data_path / "non-UNFCCC" / "Taiwan"
			
 
				     # TODO: move file to subfolder
			
 
				-    output_folder = extracted_data_path / 'non-UNFCCC' / 'Taiwan'
			
 
				+    output_folder = extracted_data_path / "non-UNFCCC" / "Taiwan"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    output_filename = 'TWN_inventory_2022_'
			
 
				-    inventory_file = '00_abstract_en.pdf'
			
 
				+    output_filename = "TWN_inventory_2022_"
			
 
				+    inventory_file = "00_abstract_en.pdf"
			
 
				 
			
 
				-    cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
			
 
				+    cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*"
			
 
				 
			
 
				     time_format = "%Y"
			
 
				 
			
@@ -79,42 +89,49 @@ if __name__ == "__main__":
 
				     # config for part3: mapping to 2006 categpries
			
 
				 
			
 
				     cat_mapping = {
			
 
				-        '3': 'M.AG',
			
 
				-        '3.A': '3.A.1',
			
 
				-        '3.B': '3.A.2',
			
 
				-        '3.C': '3.C.7',
			
 
				-        '3.D': 'M.3.AS',
			
 
				-        '3.F': '3.C.1.b',
			
 
				-        '3.H': '3.C.3',
			
 
				-        '4': 'M.LULUCF',
			
 
				-        '5': '4',
			
 
				-        '5.A': '4.A',
			
 
				-        '5.B': '4.B',
			
 
				-        '5.C': '4.C',
			
 
				-        '5.D': '4.D',
			
 
				-        '5.D.1': '4.D.1',
			
 
				-        '5.D.2': '4.D.2',
			
 
				+        "3": "M.AG",
			
 
				+        "3.A": "3.A.1",
			
 
				+        "3.B": "3.A.2",
			
 
				+        "3.C": "3.C.7",
			
 
				+        "3.D": "M.3.AS",
			
 
				+        "3.F": "3.C.1.b",
			
 
				+        "3.H": "3.C.3",
			
 
				+        "4": "M.LULUCF",
			
 
				+        "5": "4",
			
 
				+        "5.A": "4.A",
			
 
				+        "5.B": "4.B",
			
 
				+        "5.C": "4.C",
			
 
				+        "5.D": "4.D",
			
 
				+        "5.D.1": "4.D.1",
			
 
				+        "5.D.2": "4.D.2",
			
 
				     }
			
 
				 
			
 
				     aggregate_cats = {
			
 
				-        '1.A': {'sources': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				-                'name': 'Fuel Combustion Activities'},
			
 
				-        '1.B': {'sources': ['1.B.1', '1.B.2'], 'name': 'Fugitive Emissions from Fuels'},
			
 
				-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-        '3.C.1': {'sources': ['3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
			
 
				-        '3.C.5': {'sources': ['3.C.5.a', '3.C.5.b'],
			
 
				-                  'name': 'Indirect N2O Emissions from Managed Soils'},
			
 
				-        '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.AS', '3.C.7'],
			
 
				-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				-        'M.AG.ELV': {'sources': ['3.C'],
			
 
				-                     'name': 'Agriculture excluding livestock emissions'},
			
 
				+        "1.A": {
			
 
				+            "sources": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
			
 
				+            "name": "Fuel Combustion Activities",
			
 
				+        },
			
 
				+        "1.B": {"sources": ["1.B.1", "1.B.2"], "name": "Fugitive Emissions from Fuels"},
			
 
				+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+        "3.C.1": {"sources": ["3.C.1.b"], "name": "Emissions from Biomass Burning"},
			
 
				+        "3.C.5": {
			
 
				+            "sources": ["3.C.5.a", "3.C.5.b"],
			
 
				+            "name": "Indirect N2O Emissions from Managed Soils",
			
 
				+        },
			
 
				+        "3.C": {
			
 
				+            "sources": ["3.C.1", "3.C.3", "M.3.AS", "3.C.7"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+        },
			
 
				+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["3.C"],
			
 
				+            "name": "Agriculture excluding livestock emissions",
			
 
				+        },
			
 
				     }
			
 
				 
			
 
				-
			
 
				     # 2 for NF3, PFCs (from 2.E)
			
 
				     aggregate_cats_NF3_PFC = {
			
 
				-        '2': {'sources': ['2.E'], 'name': 'Industrial Process and Product Use Sector'},
			
 
				+        "2": {"sources": ["2.E"], "name": "Industrial Process and Product Use Sector"},
			
 
				     }
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
@@ -130,11 +147,10 @@ if __name__ == "__main__":
 
				             str(input_folder / inventory_file),
			
 
				             pages=page,
			
 
				             **page_defs[page],
			
 
				-            )
			
 
				+        )
			
 
				         for table in new_tables:
			
 
				             all_tables.append(table.df)
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # convert tables to primap2 format
			
 
				     # ###
			
@@ -148,39 +164,49 @@ if __name__ == "__main__":
 
				         if len(table_def["tables"]) > 1:
			
 
				             for table in table_def["tables"][1:]:
			
 
				                 df_this_table = pd.concat(
			
 
				-                    [df_this_table, all_tables[table]],
			
 
				-                    axis=0,
			
 
				-                    join='outer')
			
 
				+                    [df_this_table, all_tables[table]], axis=0, join="outer"
			
 
				+                )
			
 
				 
			
 
				         # fix for table ES3.6
			
 
				-        if table_name == 'ES3.6':
			
 
				+        if table_name == "ES3.6":
			
 
				             col_idx = df_this_table[0] == "Total CO Emission"
			
 
				-            df_this_table.loc[col_idx, 1:] = ''
			
 
				-            df_this_table.loc[col_idx, 0] = 'Total CO2 Emission'
			
 
				+            df_this_table.loc[col_idx, 1:] = ""
			
 
				+            df_this_table.loc[col_idx, 0] = "Total CO2 Emission"
			
 
				 
			
 
				         df_this_table = df_this_table.reset_index(drop=True)
			
 
				 
			
 
				         # fix categories if necessary
			
 
				         if "fix_cats" in table_def.keys():
			
 
				             for col in table_def["fix_cats"]:
			
 
				-                df_this_table[col] = df_this_table[col].replace(table_def["fix_cats"][col])
			
 
				+                df_this_table[col] = df_this_table[col].replace(
			
 
				+                    table_def["fix_cats"][col]
			
 
				+                )
			
 
				 
			
 
				         # fix rows
			
 
				         for col in table_def["rows_to_fix"].keys():
			
 
				             for n_rows in table_def["rows_to_fix"][col].keys():
			
 
				                 print(f"Fixing {col}, {n_rows}")
			
 
				                 # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				-                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("-", "-")
			
 
				-                df_this_table = fix_rows(df_this_table,
			
 
				-                                         table_def["rows_to_fix"][col][n_rows], col, n_rows)
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "\n", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "   ", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "  ", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "-", "-"
			
 
				+                )
			
 
				+                df_this_table = fix_rows(
			
 
				+                    df_this_table, table_def["rows_to_fix"][col][n_rows], col, n_rows
			
 
				+                )
			
 
				 
			
 
				         # split by entity
			
 
				         if "gas_splitting" in table_def.keys():
			
 
				-            col_entity = [''] * len(df_this_table)
			
 
				-            last_entity = ''
			
 
				+            col_entity = [""] * len(df_this_table)
			
 
				+            last_entity = ""
			
 
				             for i in range(0, len(df_this_table)):
			
 
				                 current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
			
 
				                 if current_header in table_def["gas_splitting"].keys():
			
@@ -191,8 +217,12 @@ if __name__ == "__main__":
 
				             table_def["index_cols"].append("entity")
			
 
				 
			
 
				         # make a wide table
			
 
				-        df_this_table = make_wide_table(df_this_table, table_def["wide_keyword"],
			
 
				-                                        table_def["col_wide_kwd"], table_def["index_cols"])
			
 
				+        df_this_table = make_wide_table(
			
 
				+            df_this_table,
			
 
				+            table_def["wide_keyword"],
			
 
				+            table_def["col_wide_kwd"],
			
 
				+            table_def["index_cols"],
			
 
				+        )
			
 
				 
			
 
				         if "drop_rows" in table_def.keys():
			
 
				             df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
			
@@ -207,11 +237,12 @@ if __name__ == "__main__":
 
				         # add unit
			
 
				         df_this_table["unit"] = table_def["unit"]
			
 
				 
			
 
				-        df_this_table = df_this_table.rename({table_def["index_cols"][0]: "orig_cat_name"},
			
 
				-                                             axis=1)
			
 
				+        df_this_table = df_this_table.rename(
			
 
				+            {table_def["index_cols"][0]: "orig_cat_name"}, axis=1
			
 
				+        )
			
 
				 
			
 
				         # print(table_def["index_cols"][0])
			
 
				-        # print(df_this_table.columns.values)
			
 
				+        # print(df_this_table.columns.to_numpy())
			
 
				 
			
 
				         # make a copy of the categories row
			
 
				         df_this_table["category"] = df_this_table["orig_cat_name"]
			
@@ -219,25 +250,30 @@ if __name__ == "__main__":
 
				         # replace cat names by codes in col "category"
			
 
				         # first the manual replacements
			
 
				         df_this_table["category"] = df_this_table["category"].replace(
			
 
				-            table_def["cat_codes_manual"])
			
 
				+            table_def["cat_codes_manual"]
			
 
				+        )
			
 
				+
			
 
				         # then the regex replacements
			
 
				-        def repl(m):
			
 
				-            return m.group('code')
			
 
				-        df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
			
 
				-                                                                          repl, regex=True)
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return m.group("code")
			
 
				+
			
 
				+        df_this_table["category"] = df_this_table["category"].str.replace(
			
 
				+            cat_code_regexp, repl, regex=True
			
 
				+        )
			
 
				 
			
 
				         ### convert to PRIMAP2 IF
			
 
				         # remove ','
			
 
				-        time_format = '%Y'
			
 
				+        time_format = "%Y"
			
 
				         time_columns = [
			
 
				             col
			
 
				-            for col in df_this_table.columns.values
			
 
				+            for col in df_this_table.columns.to_numpy()
			
 
				             if matches_time_format(col, time_format)
			
 
				         ]
			
 
				 
			
 
				         for col in time_columns:
			
 
				-            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(',', '',
			
 
				-                                                                              regex=False)
			
 
				+            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
			
 
				+                ",", "", regex=False
			
 
				+            )
			
 
				 
			
 
				         # drop orig_cat_name as it's not unique per category
			
 
				         df_this_table = df_this_table.drop(columns="orig_cat_name")
			
@@ -254,7 +290,7 @@ if __name__ == "__main__":
 
				             # coords_value_filling=coords_value_filling,
			
 
				             # filter_remove=filter_remove,
			
 
				             # filter_keep=filter_keep,
			
 
				-            meta_data=meta_data
			
 
				+            meta_data=meta_data,
			
 
				         )
			
 
				 
			
 
				         this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
			
@@ -267,7 +303,6 @@ if __name__ == "__main__":
 
				     # convert back to IF to have units in the fixed format
			
 
				     data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # convert to IPCC2006 categories
			
 
				     # ###
			
@@ -275,31 +310,36 @@ if __name__ == "__main__":
 
				     data_if_2006
			
 
				     # filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
			
 
				     data_if_2006 = data_if_2006.replace(
			
 
				-        {'category (IPCC2006_1996_Taiwan_Inv)': cat_mapping})
			
 
				+        {"category (IPCC2006_1996_Taiwan_Inv)": cat_mapping}
			
 
				+    )
			
 
				 
			
 
				     # rename the category col
			
 
				-    data_if_2006.rename(
			
 
				-        columns={'category (IPCC2006_1996_Taiwan_Inv)': 'category (IPCC2006_PRIMAP)'},
			
 
				-        inplace=True)
			
 
				-    data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				-    data_if_2006.attrs['dimensions']['*'] = [
			
 
				-        'category (IPCC2006_PRIMAP)' if item == 'category (IPCC2006_1996_Taiwan_Inv)'
			
 
				-        else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				+    data_if_2006 = data_if_2006.rename(
			
 
				+        columns={"category (IPCC2006_1996_Taiwan_Inv)": "category (IPCC2006_PRIMAP)"}
			
 
				+    )
			
 
				+    data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
			
 
				+    data_if_2006.attrs["dimensions"]["*"] = [
			
 
				+        "category (IPCC2006_PRIMAP)"
			
 
				+        if item == "category (IPCC2006_1996_Taiwan_Inv)"
			
 
				+        else item
			
 
				+        for item in data_if_2006.attrs["dimensions"]["*"]
			
 
				+    ]
			
 
				 
			
 
				     # aggregate categories
			
 
				     for cat_to_agg in aggregate_cats:
			
 
				         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				-            aggregate_cats[cat_to_agg]["sources"])
			
 
				+            aggregate_cats[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -307,8 +347,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
@@ -324,19 +371,21 @@ if __name__ == "__main__":
 
				     # aggregate categories
			
 
				     for cat_to_agg in aggregate_cats_NF3_PFC:
			
 
				         mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				-            aggregate_cats_NF3_PFC[cat_to_agg]["sources"])
			
 
				+            aggregate_cats_NF3_PFC[cat_to_agg]["sources"]
			
 
				+        )
			
 
				         mask_gas = data_if_2006["entity"].isin(
			
 
				-            [f"NF3 ({gwp_to_use})", f"PFCS ({gwp_to_use})"])
			
 
				+            [f"NF3 ({gwp_to_use})", f"PFCS ({gwp_to_use})"]
			
 
				+        )
			
 
				         df_test = data_if_2006[mask & mask_gas]
			
 
				 
			
 
				         if len(df_test) > 0:
			
 
				             print(f"Aggregating category {cat_to_agg}")
			
 
				             df_combine = df_test.copy(deep=True)
			
 
				 
			
 
				-            time_format = '%Y'
			
 
				+            time_format = "%Y"
			
 
				             time_columns = [
			
 
				                 col
			
 
				-                for col in df_combine.columns.values
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				                 if matches_time_format(col, time_format)
			
 
				             ]
			
 
				 
			
@@ -344,8 +393,15 @@ if __name__ == "__main__":
 
				                 df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				 
			
 
				             df_combine = df_combine.groupby(
			
 
				-                by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                    'unit']).sum(min_count=1)
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				             # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
@@ -362,7 +418,7 @@ if __name__ == "__main__":
 
				     data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				 
			
 
				     # convert to mass units from CO2eq
			
 
				-    entities_to_convert = ['N2O', 'SF6', 'CH4', 'NF3']
			
 
				+    entities_to_convert = ["N2O", "SF6", "CH4", "NF3"]
			
 
				     entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
			
 
				 
			
 
				     for entity in entities_to_convert:
			
@@ -382,19 +438,23 @@ if __name__ == "__main__":
 
				     # save data
			
 
				     # ###
			
 
				     # data in original categories
			
 
				-    pm2.pm2io.write_interchange_format(output_folder /
			
 
				-                                       (output_filename + coords_terminologies["category"]),
			
 
				-                                       data_if)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				     encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-    data_pm2.pr.to_netcdf((output_folder /
			
 
				-                          (output_filename + coords_terminologies[
			
 
				-                              "category"])).with_suffix(".nc"),
			
 
				-                          encoding=encoding)
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        (
			
 
				+            output_folder / (output_filename + coords_terminologies["category"])
			
 
				+        ).with_suffix(".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # data in 2006 categories
			
 
				-    pm2.pm2io.write_interchange_format(output_folder /
			
 
				-                                       (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006
			
 
				+    )
			
 
				     encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-    data_pm2_2006.pr.to_netcdf((output_folder /
			
 
				-                                (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
			
 
				-                               encoding=encoding)
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        (output_folder / (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Thailand's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Argentina's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'THA'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=THA
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py
@@ -1,38 +1,54 @@
 
				-# configuration for Thailand, BUR4
			
 
				+"""Config for Thailand's BUR4
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+
			
 
				 # ###
			
 
				 # for reading
			
 
				 # ###
			
 
				 
			
 
				 # general
			
 
				 gwp_to_use = "AR4GWP100"
			
 
				-terminology_proc = 'IPCC2006_PRIMAP'
			
 
				+terminology_proc = "IPCC2006_PRIMAP"
			
 
				 
			
 
				-header_inventory = ['Greenhouse gas source and sink categories',
			
 
				-                   'CO2 emissions', 'CO2 removals',
			
 
				-                   'CH4', 'N2O', 'NOx', 'CO', 'NMVOCs',
			
 
				-                   'SO2', 'HFCs', 'PFCs', 'SF6']
			
 
				-unit_inventory = ['Gg'] * len(header_inventory)
			
 
				+header_inventory = [
			
 
				+    "Greenhouse gas source and sink categories",
			
 
				+    "CO2 emissions",
			
 
				+    "CO2 removals",
			
 
				+    "CH4",
			
 
				+    "N2O",
			
 
				+    "NOx",
			
 
				+    "CO",
			
 
				+    "NMVOCs",
			
 
				+    "SO2",
			
 
				+    "HFCs",
			
 
				+    "PFCs",
			
 
				+    "SF6",
			
 
				+]
			
 
				+unit_inventory = ["Gg"] * len(header_inventory)
			
 
				 unit_inventory[9] = "GgCO2eq"
			
 
				 unit_inventory[10] = "GgCO2eq"
			
 
				 
			
 
				 # 2019 inventory
			
 
				 inv_conf = {
			
 
				-    'year': 2016,
			
 
				-    'entity_row': 0,
			
 
				-    'unit_row': 1,
			
 
				-    'index_cols': "Greenhouse gas source and sink categories",
			
 
				-    'header': header_inventory,
			
 
				-    'unit': unit_inventory,
			
 
				+    "year": 2016,
			
 
				+    "entity_row": 0,
			
 
				+    "unit_row": 1,
			
 
				+    "index_cols": "Greenhouse gas source and sink categories",
			
 
				+    "header": header_inventory,
			
 
				+    "unit": unit_inventory,
			
 
				     # special header as category UNFCCC_GHG_data and name in one column
			
 
				-    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				     # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				     # # automatically with the other codes)
			
 
				-    'cat_codes_manual': {
			
 
				-        '6. Other Memo Items (not accounted in Total Emissions)': 'MEMO',
			
 
				-        'International Bunkers': 'MBK',
			
 
				-        'CO2 from Biomass': 'MBIO',
			
 
				+    "cat_codes_manual": {
			
 
				+        "6. Other Memo Items (not accounted in Total Emissions)": "MEMO",
			
 
				+        "International Bunkers": "MBK",
			
 
				+        "CO2 from Biomass": "MBIO",
			
 
				     },
			
 
				-    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*",
			
 
				 }
			
 
				 
			
 
				 # primap2 format conversion
			
@@ -59,14 +75,14 @@ coords_value_mapping = {
 
				     "unit": "PRIMAP1",
			
 
				     "category": "PRIMAP1",
			
 
				     "entity": {
			
 
				-        'HFCs': f"HFCS ({gwp_to_use})",
			
 
				-        'PFCs': f"PFCS ({gwp_to_use})",
			
 
				-        'NMVOCs': 'NMVOC',
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "NMVOCs": "NMVOC",
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 filter_remove = {
			
 
				-    'f_memo': {"category": "MEMO"},
			
 
				+    "f_memo": {"category": "MEMO"},
			
 
				 }
			
 
				 filter_keep = {}
			
 
				 
			
@@ -81,26 +97,31 @@ meta_data = {
 
				 
			
 
				 # main sector time series
			
 
				 header_main_sector_ts = [
			
 
				-    'Year', 'Energy', 'IPPU',
			
 
				-    'Agriculture', 'LULUCF', 'Waste',
			
 
				-    'Net emissions (Including LULUCF)',
			
 
				-    'Net emissions (Excluding LULUCF)']
			
 
				-unit_main_sector_ts = ['GgCO2eq'] * len(header_main_sector_ts)
			
 
				-unit_main_sector_ts[0] = ''
			
 
				+    "Year",
			
 
				+    "Energy",
			
 
				+    "IPPU",
			
 
				+    "Agriculture",
			
 
				+    "LULUCF",
			
 
				+    "Waste",
			
 
				+    "Net emissions (Including LULUCF)",
			
 
				+    "Net emissions (Excluding LULUCF)",
			
 
				+]
			
 
				+unit_main_sector_ts = ["GgCO2eq"] * len(header_main_sector_ts)
			
 
				+unit_main_sector_ts[0] = ""
			
 
				 
			
 
				 trend_conf = {
			
 
				-    'header': header_main_sector_ts,
			
 
				-    'unit': unit_main_sector_ts,
			
 
				+    "header": header_main_sector_ts,
			
 
				+    "unit": unit_main_sector_ts,
			
 
				     # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				     # automatically with the other codes)
			
 
				-    'cat_codes_manual': {
			
 
				-        'Energy': "1",
			
 
				-        'IPPU': "2",
			
 
				-        'Agriculture': "3",
			
 
				-        'LULUCF': "4",
			
 
				-        'Waste': "5",
			
 
				-        'Net emissions (Including LULUCF)': "0",
			
 
				-        'Net emissions (Excluding LULUCF)': "M0EL",
			
 
				+    "cat_codes_manual": {
			
 
				+        "Energy": "1",
			
 
				+        "IPPU": "2",
			
 
				+        "Agriculture": "3",
			
 
				+        "LULUCF": "4",
			
 
				+        "Waste": "5",
			
 
				+        "Net emissions (Including LULUCF)": "0",
			
 
				+        "Net emissions (Excluding LULUCF)": "M0EL",
			
 
				     },
			
 
				 }
			
 
				 
			
@@ -118,14 +139,13 @@ coords_defaults_main_sector_ts = {
 
				 }
			
 
				 
			
 
				 # indirect gases time series
			
 
				-header_indirect = ['Year', 'NOx', 'CO',
			
 
				-                    'NMVOCs', 'SO2']
			
 
				-unit_indirect = ['Gg'] * len(header_indirect)
			
 
				-unit_indirect[0] = ''
			
 
				+header_indirect = ["Year", "NOx", "CO", "NMVOCs", "SO2"]
			
 
				+unit_indirect = ["Gg"] * len(header_indirect)
			
 
				+unit_indirect[0] = ""
			
 
				 ind_conf = {
			
 
				-    'header': header_indirect,
			
 
				-    'unit': unit_indirect,
			
 
				-    'cols_to_remove': ['Average Annual Growth Rate'],
			
 
				+    "header": header_indirect,
			
 
				+    "unit": unit_indirect,
			
 
				+    "cols_to_remove": ["Average Annual Growth Rate"],
			
 
				 }
			
 
				 
			
 
				 coords_cols_indirect = {
			
@@ -146,111 +166,203 @@ coords_defaults_indirect = {
 
				 # ###
			
 
				 # aggregate categories
			
 
				 country_processing_step1 = {
			
 
				-    'aggregate_cats': {
			
 
				-        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
			
 
				-                  'name': 'Other Process uses of Carbonates'},
			
 
				+    "aggregate_cats": {
			
 
				+        "2.A.4": {
			
 
				+            "sources": ["2.A.4.b", "2.A.4.d"],
			
 
				+            "name": "Other Process uses of Carbonates",
			
 
				+        },
			
 
				     },
			
 
				-    'aggregate_gases': {
			
 
				-        'KYOTOGHG': {
			
 
				-            'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				-            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
			
 
				-                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
			
 
				-            'skipna': True,
			
 
				-            'min_count': 1,
			
 
				-            'sel': {f'category ({coords_terminologies["category"]})':
			
 
				-                [
			
 
				-                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				-                    '1.A.4', '1.B', '1.B.1', '1.B.2',
			
 
				-                    '1.C',
			
 
				-                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				-                    '2.B', '2.C', '2.D', '2.H',
			
 
				-                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                    '3.H', '3.I',
			
 
				-                    '4', '4.A', '4.B', '4.C', '4.D', '4.E',
			
 
				-                    '5', '5.A', '5.B', '5.C', '5.D'
			
 
				+    "aggregate_gases": {
			
 
				+        "KYOTOGHG": {
			
 
				+            "basket": "KYOTOGHG (AR4GWP100)",
			
 
				+            "basket_contents": [
			
 
				+                "CO2",
			
 
				+                "CH4",
			
 
				+                "N2O",
			
 
				+                "SF6",
			
 
				+                "HFCS (AR4GWP100)",
			
 
				+                "PFCS (AR4GWP100)",
			
 
				+            ],
			
 
				+            "skipna": True,
			
 
				+            "min_count": 1,
			
 
				+            "sel": {
			
 
				+                f'category ({coords_terminologies["category"]})': [
			
 
				+                    "0",
			
 
				+                    "1",
			
 
				+                    "1.A",
			
 
				+                    "1.A.1",
			
 
				+                    "1.A.2",
			
 
				+                    "1.A.3",
			
 
				+                    "1.A.4",
			
 
				+                    "1.B",
			
 
				+                    "1.B.1",
			
 
				+                    "1.B.2",
			
 
				+                    "1.C",
			
 
				+                    "2",
			
 
				+                    "2.A",
			
 
				+                    "2.A.1",
			
 
				+                    "2.A.2",
			
 
				+                    "2.A.3",
			
 
				+                    "2.A.4",
			
 
				+                    "2.B",
			
 
				+                    "2.C",
			
 
				+                    "2.D",
			
 
				+                    "2.H",
			
 
				+                    "3",
			
 
				+                    "3.A",
			
 
				+                    "3.B",
			
 
				+                    "3.C",
			
 
				+                    "3.D",
			
 
				+                    "3.E",
			
 
				+                    "3.F",
			
 
				+                    "3.G",
			
 
				+                    "3.H",
			
 
				+                    "3.I",
			
 
				+                    "4",
			
 
				+                    "4.A",
			
 
				+                    "4.B",
			
 
				+                    "4.C",
			
 
				+                    "4.D",
			
 
				+                    "4.E",
			
 
				+                    "5",
			
 
				+                    "5.A",
			
 
				+                    "5.B",
			
 
				+                    "5.C",
			
 
				+                    "5.D",
			
 
				                 ]
			
 
				-            }, # not tested
			
 
				+            },  # not tested
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 country_processing_step2 = {
			
 
				-    'downscale': {
			
 
				+    "downscale": {
			
 
				         # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
			
 
				         # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
			
 
				-        'sectors': {
			
 
				-            '1': {
			
 
				-                'basket': '1',
			
 
				-                'basket_contents': ['1.A', '1.B', '1.C'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+        "sectors": {
			
 
				+            "1": {
			
 
				+                "basket": "1",
			
 
				+                "basket_contents": ["1.A", "1.B", "1.C"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '1.A': {
			
 
				-                'basket': '1.A',
			
 
				-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "1.A": {
			
 
				+                "basket": "1.A",
			
 
				+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '1.B': {
			
 
				-                'basket': '1.B',
			
 
				-                'basket_contents': ['1.B.1', '1.B.2'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "1.B": {
			
 
				+                "basket": "1.B",
			
 
				+                "basket_contents": ["1.B.1", "1.B.2"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '2': {
			
 
				-                'basket': '2',
			
 
				-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.H'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "2": {
			
 
				+                "basket": "2",
			
 
				+                "basket_contents": ["2.A", "2.B", "2.C", "2.D", "2.H"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '2.A': {
			
 
				-                'basket': '2.A',
			
 
				-                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "2.A": {
			
 
				+                "basket": "2.A",
			
 
				+                "basket_contents": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '3': {
			
 
				-                'basket': '3',
			
 
				-                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                                    '3.H', '3.I'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "3": {
			
 
				+                "basket": "3",
			
 
				+                "basket_contents": [
			
 
				+                    "3.A",
			
 
				+                    "3.B",
			
 
				+                    "3.C",
			
 
				+                    "3.D",
			
 
				+                    "3.E",
			
 
				+                    "3.F",
			
 
				+                    "3.G",
			
 
				+                    "3.H",
			
 
				+                    "3.I",
			
 
				+                ],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '4': {
			
 
				-                'basket': '4',
			
 
				-                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "4": {
			
 
				+                "basket": "4",
			
 
				+                "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '5': {
			
 
				-                'basket': '5',
			
 
				-                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "5": {
			
 
				+                "basket": "5",
			
 
				+                "basket_contents": ["5.A", "5.B", "5.C", "5.D"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				         },
			
 
				-        'entities': {
			
 
				-            'KYOTO': {
			
 
				-                'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				-                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
			
 
				-                                    'PFCS (AR4GWP100)', 'SF6'],
			
 
				-                'sel': {f'category ({coords_terminologies["category"]})':
			
 
				-                    [
			
 
				-                        '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				-                        '1.A.4', '1.B', '1.B.1', '1.B.2', '1.C',
			
 
				-                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				-                        '2.B', '2.C', '2.D', '2.H',
			
 
				-                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                        '3.H', '3.I',
			
 
				-                        '4', '4.A', '4.B', '4.C', '4.D', '4.E',
			
 
				-                        '5', '5.A', '5.B', '5.C', '5.D']},
			
 
				+        "entities": {
			
 
				+            "KYOTO": {
			
 
				+                "basket": "KYOTOGHG (AR4GWP100)",
			
 
				+                "basket_contents": [
			
 
				+                    "CH4",
			
 
				+                    "CO2",
			
 
				+                    "N2O",
			
 
				+                    "HFCS (AR4GWP100)",
			
 
				+                    "PFCS (AR4GWP100)",
			
 
				+                    "SF6",
			
 
				+                ],
			
 
				+                "sel": {
			
 
				+                    f'category ({coords_terminologies["category"]})': [
			
 
				+                        "0",
			
 
				+                        "1",
			
 
				+                        "1.A",
			
 
				+                        "1.A.1",
			
 
				+                        "1.A.2",
			
 
				+                        "1.A.3",
			
 
				+                        "1.A.4",
			
 
				+                        "1.B",
			
 
				+                        "1.B.1",
			
 
				+                        "1.B.2",
			
 
				+                        "1.C",
			
 
				+                        "2",
			
 
				+                        "2.A",
			
 
				+                        "2.A.1",
			
 
				+                        "2.A.2",
			
 
				+                        "2.A.3",
			
 
				+                        "2.A.4",
			
 
				+                        "2.B",
			
 
				+                        "2.C",
			
 
				+                        "2.D",
			
 
				+                        "2.H",
			
 
				+                        "3",
			
 
				+                        "3.A",
			
 
				+                        "3.B",
			
 
				+                        "3.C",
			
 
				+                        "3.D",
			
 
				+                        "3.E",
			
 
				+                        "3.F",
			
 
				+                        "3.G",
			
 
				+                        "3.H",
			
 
				+                        "3.I",
			
 
				+                        "4",
			
 
				+                        "4.A",
			
 
				+                        "4.B",
			
 
				+                        "4.C",
			
 
				+                        "4.D",
			
 
				+                        "4.E",
			
 
				+                        "5",
			
 
				+                        "5.A",
			
 
				+                        "5.B",
			
 
				+                        "5.C",
			
 
				+                        "5.D",
			
 
				+                    ]
			
 
				+                },
			
 
				             },
			
 
				         },
			
 
				     },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
 
				 ## not in BUR3: 1.A.1.a, 1.A.1.b, 1.A.3.a, 1.A.3.b, 1.A.3.c, 1.A.3.d, 1.A.5, 1.B.3,
			
@@ -258,106 +370,176 @@ country_processing_step2 = {
 
				 # 4.E.x, 5.X.y M.BK.A, M.BK.M
			
 
				 
			
 
				 cat_conversion = {
			
 
				-    'mapping': {
			
 
				-        '0': '0',
			
 
				-        'M.0.EL': 'M.0.EL',
			
 
				-        '1': '1',
			
 
				-        '1.A': '1.A',
			
 
				-        '1.A.1': '1.A.1',
			
 
				-        '1.A.2': '1.A.2',
			
 
				-        '1.A.3': '1.A.3',
			
 
				-        '1.A.4': '1.A.4',
			
 
				-        '1.B': '1.B',
			
 
				-        '1.B.1': '1.B.1',
			
 
				-        '1.B.2': '1.B.2',
			
 
				-        '1.C': '1.C',
			
 
				-        '1.C.1': '1.C.1',
			
 
				-        '1.C.2': '1.C.2',
			
 
				-        '1.C.3': '1.C.3',
			
 
				-        '2': '2',
			
 
				-        '2.A': '2.A',
			
 
				-        '2.A.1': '2.A.1',
			
 
				-        '2.A.2': '2.A.2',
			
 
				-        '2.A.3': '2.A.3',
			
 
				-        '2.A.4': '2.A.4',
			
 
				-        '2.A.4.b': '2.A.4.b',
			
 
				-        '2.A.4.d': '2.A.4.d',
			
 
				-        '2.B': '2.B',
			
 
				-        '2.C': '2.C',
			
 
				-        '2.C.1': '2.C.1',
			
 
				-        '2.D': '2.D',
			
 
				-        '2.D.1': '2.D.1',
			
 
				-        '2.H': '2.H',
			
 
				-        '2.H.1': '2.H.1',
			
 
				-        '2.H.2': '2.H.2',
			
 
				-        '3': 'M.AG',
			
 
				-        '3.A': '3.A.1',
			
 
				-        '3.B': '3.A.2',
			
 
				-        '3.C': 'M.3.C.1.AG',  # field burning of agricultural residues
			
 
				-        '3.D': '3.C.2',  # Liming
			
 
				-        '3.E': '3.C.3',  # urea application
			
 
				-        '3.F': '3.C.4',  # direct N2O from agri soils
			
 
				-        '3.G': '3.C.5',  # indirect N2O from agri soils
			
 
				-        '3.H': '3.C.6',  # indirect N2O from manure management
			
 
				-        '3.I': '3.C.7',  # rice
			
 
				-        '4': 'M.LULUCF',
			
 
				-        '4.A': '3.B.1.a',  # forest remaining forest
			
 
				-        '4.B': '3.B.2.a',  # cropland remaining cropland
			
 
				-        '4.C': '3.B.2.b',  # land converted to cropland
			
 
				-        '4.D': '3.B.6.b',  # land converted to other land
			
 
				-        '4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
			
 
				-        '5': '4',
			
 
				-        '5.A': '4.A',
			
 
				-        '5.B': '4.B',
			
 
				-        '5.C': '4.C',
			
 
				-        '5.D': '4.D',
			
 
				-        'M.BK': 'M.BK',
			
 
				-        'M.BIO': 'M.BIO',
			
 
				+    "mapping": {
			
 
				+        "0": "0",
			
 
				+        "M.0.EL": "M.0.EL",
			
 
				+        "1": "1",
			
 
				+        "1.A": "1.A",
			
 
				+        "1.A.1": "1.A.1",
			
 
				+        "1.A.2": "1.A.2",
			
 
				+        "1.A.3": "1.A.3",
			
 
				+        "1.A.4": "1.A.4",
			
 
				+        "1.B": "1.B",
			
 
				+        "1.B.1": "1.B.1",
			
 
				+        "1.B.2": "1.B.2",
			
 
				+        "1.C": "1.C",
			
 
				+        "1.C.1": "1.C.1",
			
 
				+        "1.C.2": "1.C.2",
			
 
				+        "1.C.3": "1.C.3",
			
 
				+        "2": "2",
			
 
				+        "2.A": "2.A",
			
 
				+        "2.A.1": "2.A.1",
			
 
				+        "2.A.2": "2.A.2",
			
 
				+        "2.A.3": "2.A.3",
			
 
				+        "2.A.4": "2.A.4",
			
 
				+        "2.A.4.b": "2.A.4.b",
			
 
				+        "2.A.4.d": "2.A.4.d",
			
 
				+        "2.B": "2.B",
			
 
				+        "2.C": "2.C",
			
 
				+        "2.C.1": "2.C.1",
			
 
				+        "2.D": "2.D",
			
 
				+        "2.D.1": "2.D.1",
			
 
				+        "2.H": "2.H",
			
 
				+        "2.H.1": "2.H.1",
			
 
				+        "2.H.2": "2.H.2",
			
 
				+        "3": "M.AG",
			
 
				+        "3.A": "3.A.1",
			
 
				+        "3.B": "3.A.2",
			
 
				+        "3.C": "M.3.C.1.AG",  # field burning of agricultural residues
			
 
				+        "3.D": "3.C.2",  # Liming
			
 
				+        "3.E": "3.C.3",  # urea application
			
 
				+        "3.F": "3.C.4",  # direct N2O from agri soils
			
 
				+        "3.G": "3.C.5",  # indirect N2O from agri soils
			
 
				+        "3.H": "3.C.6",  # indirect N2O from manure management
			
 
				+        "3.I": "3.C.7",  # rice
			
 
				+        "4": "M.LULUCF",
			
 
				+        "4.A": "3.B.1.a",  # forest remaining forest
			
 
				+        "4.B": "3.B.2.a",  # cropland remaining cropland
			
 
				+        "4.C": "3.B.2.b",  # land converted to cropland
			
 
				+        "4.D": "3.B.6.b",  # land converted to other land
			
 
				+        "4.E": "M.3.C.1.LU",  # biomass burning (LULUCF)
			
 
				+        "5": "4",
			
 
				+        "5.A": "4.A",
			
 
				+        "5.B": "4.B",
			
 
				+        "5.C": "4.C",
			
 
				+        "5.D": "4.D",
			
 
				+        "M.BK": "M.BK",
			
 
				+        "M.BIO": "M.BIO",
			
 
				     },
			
 
				-    'aggregate': {
			
 
				-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
			
 
				-                  'name': 'Emissions from Biomass Burning'},
			
 
				-        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-        'M.3.C.AG': {
			
 
				-            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
			
 
				-                     'name': 'Agriculture excluding livestock emissions'},
			
 
				-        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				-        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
			
 
				-        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
			
 
				-        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
			
 
				-        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
			
 
				-        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
			
 
				-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    "aggregate": {
			
 
				+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+        "3.C.1": {
			
 
				+            "sources": ["M.3.C.1.AG", "M.3.C.1.LU"],
			
 
				+            "name": "Emissions from Biomass Burning",
			
 
				+        },
			
 
				+        "3.C": {
			
 
				+            "sources": ["3.C.1", "3.C.2", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+        },
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "M.3.C.1.AG",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+        },
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG"],
			
 
				+            "name": "Agriculture excluding livestock emissions",
			
 
				+        },
			
 
				+        "M.3.C.LU": {
			
 
				+            "sources": ["M.3.C.1.LU"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
			
 
				+        },
			
 
				+        "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},
			
 
				+        "3.B.2": {"sources": ["3.B.2.a", "3.B.2.b"], "name": "Cropland"},
			
 
				+        "3.B.6": {"sources": ["3.B.6.b"], "name": "Other Land"},
			
 
				+        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.6"], "name": "Land"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "N.3.C.LU"], "name": "LULUCF"},
			
 
				+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 sectors_to_save = [
			
 
				-    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4',
			
 
				-    '1.B', '1.B.1', '1.B.2', '1.C', '1.C.1', '1.C.2', '1.C.3',
			
 
				-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
			
 
				-    '2.B', '2.C', '2.C.1', '2.H', '2.H.1', '2.H.2',
			
 
				-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
			
 
				-    '3.C', '3.C.1', '3.C.2', '3.C.3', '3.C.4',
			
 
				-    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
			
 
				-    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
			
 
				-    '3.B.2.b', '3.B.6', '3.B.6.b',
			
 
				-    '4', '4.A', '4.B', '4.C', '4.D',
			
 
				-    '0', 'M.0.EL', 'M.BK', 'M.BIO']
			
 
				+    "1",
			
 
				+    "1.A",
			
 
				+    "1.A.1",
			
 
				+    "1.A.2",
			
 
				+    "1.A.3",
			
 
				+    "1.A.4",
			
 
				+    "1.B",
			
 
				+    "1.B.1",
			
 
				+    "1.B.2",
			
 
				+    "1.C",
			
 
				+    "1.C.1",
			
 
				+    "1.C.2",
			
 
				+    "1.C.3",
			
 
				+    "2",
			
 
				+    "2.A",
			
 
				+    "2.A.1",
			
 
				+    "2.A.2",
			
 
				+    "2.A.3",
			
 
				+    "2.A.4",
			
 
				+    "2.A.4.b",
			
 
				+    "2.A.4.d",
			
 
				+    "2.B",
			
 
				+    "2.C",
			
 
				+    "2.C.1",
			
 
				+    "2.H",
			
 
				+    "2.H.1",
			
 
				+    "2.H.2",
			
 
				+    "3",
			
 
				+    "M.AG",
			
 
				+    "3.A",
			
 
				+    "3.A.1",
			
 
				+    "3.A.2",
			
 
				+    "3.C",
			
 
				+    "3.C.1",
			
 
				+    "3.C.2",
			
 
				+    "3.C.3",
			
 
				+    "3.C.4",
			
 
				+    "3.C.5",
			
 
				+    "3.C.6",
			
 
				+    "3.C.7",
			
 
				+    "M.3.C.1.AG",
			
 
				+    "M.3.C.AG",
			
 
				+    "M.AG.ELV",
			
 
				+    "M.LULUCF",
			
 
				+    "M.3.C.1.LU",
			
 
				+    "M.3.C.LU",
			
 
				+    "3.B",
			
 
				+    "3.B.1",
			
 
				+    "3.B.1.a",
			
 
				+    "3.B.2",
			
 
				+    "3.B.2.a",
			
 
				+    "3.B.2.b",
			
 
				+    "3.B.6",
			
 
				+    "3.B.6.b",
			
 
				+    "4",
			
 
				+    "4.A",
			
 
				+    "4.B",
			
 
				+    "4.C",
			
 
				+    "4.D",
			
 
				+    "0",
			
 
				+    "M.0.EL",
			
 
				+    "M.BK",
			
 
				+    "M.BIO",
			
 
				+]
			
 
				 
			
 
				 
			
 
				 # gas baskets
			
 
				 gas_baskets = {
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur4.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur4.py
@@ -1,31 +1,35 @@
 
				-# configuration for Thailand, BUR4
			
 
				+"""Config for Thailand's BUR5
			
 
				+
			
 
				+Full configuration including PRIMAP2 conversion config and metadata
			
 
				+
			
 
				+"""
			
 
				 # ###
			
 
				 # for reading
			
 
				 # ###
			
 
				 
			
 
				 # general
			
 
				 gwp_to_use = "AR4GWP100"
			
 
				-terminology_proc = 'IPCC2006_PRIMAP'
			
 
				+terminology_proc = "IPCC2006_PRIMAP"
			
 
				 
			
 
				 # 2019 inventory
			
 
				 inv_conf = {
			
 
				-    'year': 2019,
			
 
				-    'entity_row': 0,
			
 
				-    'unit_row': 1,
			
 
				-    'index_cols': "Greenhouse gas source and sink categories",
			
 
				+    "year": 2019,
			
 
				+    "entity_row": 0,
			
 
				+    "unit_row": 1,
			
 
				+    "index_cols": "Greenhouse gas source and sink categories",
			
 
				     # special header as category UNFCCC_GHG_data and name in one column
			
 
				-    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				     # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				     # # automatically with the other codes)
			
 
				-    'cat_codes_manual': {
			
 
				-        'Total national emissions and removals': '0',
			
 
				-        'Memo Items (not accounted in total Emissions)': 'MEMO',
			
 
				-        'International Bunkers': 'MBK',
			
 
				-        'Aviation International Bunkers': 'MBKA',
			
 
				-        'Marine-International Bunkers': 'MBKM',
			
 
				-        'CO2 from biomass': 'MBIO',
			
 
				+    "cat_codes_manual": {
			
 
				+        "Total national emissions and removals": "0",
			
 
				+        "Memo Items (not accounted in total Emissions)": "MEMO",
			
 
				+        "International Bunkers": "MBK",
			
 
				+        "Aviation International Bunkers": "MBKA",
			
 
				+        "Marine-International Bunkers": "MBKM",
			
 
				+        "CO2 from biomass": "MBIO",
			
 
				     },
			
 
				-    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*",
			
 
				 }
			
 
				 
			
 
				 # primap2 format conversion
			
@@ -52,16 +56,16 @@ coords_value_mapping = {
 
				     "unit": "PRIMAP1",
			
 
				     "category": "PRIMAP1",
			
 
				     "entity": {
			
 
				-        'HFCs': f"HFCS ({gwp_to_use})",
			
 
				-        'PFCs': f"PFCS ({gwp_to_use})",
			
 
				-        'SF6': f'SF6 ({gwp_to_use})',
			
 
				-        'NMVOCs': 'NMVOC',
			
 
				-        'Nox': 'NOx',
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "SF6": f"SF6 ({gwp_to_use})",
			
 
				+        "NMVOCs": "NMVOC",
			
 
				+        "Nox": "NOx",
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 filter_remove = {
			
 
				-    'f_memo': {"category": "MEMO"},
			
 
				+    "f_memo": {"category": "MEMO"},
			
 
				 }
			
 
				 filter_keep = {}
			
 
				 
			
@@ -78,13 +82,13 @@ meta_data = {
 
				 # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				 # automatically with the other codes)
			
 
				 cat_codes_manual_main_sector_ts = {
			
 
				-    'Energy': "1",
			
 
				-    'Industrial Processes and Product Use': "2",
			
 
				-    'Agriculture': "3",
			
 
				-    'LULUCF': "4",
			
 
				-    'Waste': "5",
			
 
				-    'Net emissions (Include LULUCF)': "0",
			
 
				-    'Total emissions (Exclude LULUCF)': "M0EL",
			
 
				+    "Energy": "1",
			
 
				+    "Industrial Processes and Product Use": "2",
			
 
				+    "Agriculture": "3",
			
 
				+    "LULUCF": "4",
			
 
				+    "Waste": "5",
			
 
				+    "Net emissions (Include LULUCF)": "0",
			
 
				+    "Total emissions (Exclude LULUCF)": "M0EL",
			
 
				 }
			
 
				 
			
 
				 coords_cols_main_sector_ts = {
			
@@ -119,263 +123,470 @@ coords_defaults_indirect = {
 
				 # ###
			
 
				 # aggregate categories
			
 
				 country_processing_step1 = {
			
 
				-    'aggregate_cats': {
			
 
				-        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
			
 
				-                  'name': 'Other Process uses of Carbonates'},
			
 
				-        '2.B.8': {'sources': ['2.B.8.b', '2.B.8.c', '2.B.8.e', '2.B.8.f'],
			
 
				-                  'name': 'Petrochemical and Carbon Black production'},
			
 
				+    "aggregate_cats": {
			
 
				+        "2.A.4": {
			
 
				+            "sources": ["2.A.4.b", "2.A.4.d"],
			
 
				+            "name": "Other Process uses of Carbonates",
			
 
				+        },
			
 
				+        "2.B.8": {
			
 
				+            "sources": ["2.B.8.b", "2.B.8.c", "2.B.8.e", "2.B.8.f"],
			
 
				+            "name": "Petrochemical and Carbon Black production",
			
 
				+        },
			
 
				     },
			
 
				-    'aggregate_gases': {
			
 
				-        'KYOTOGHG': {
			
 
				-            'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				-            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
			
 
				-                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
			
 
				-            'skipna': True,
			
 
				-            'min_count': 1,
			
 
				-            'sel': {f'category ({coords_terminologies["category"]})':
			
 
				-                [
			
 
				-                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				-                    '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
			
 
				-                    '1.C',
			
 
				-                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				-                    '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
			
 
				-                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                    '3.H', '3.I',
			
 
				-                    '4', '4.A', '4.B', '4.C', '4.D',
			
 
				-                    '4.E', '4.E.1', '4.E.2', '4.E.3',
			
 
				-                    '5', '5.A', '5.B', '5.C', '5.D'
			
 
				+    "aggregate_gases": {
			
 
				+        "KYOTOGHG": {
			
 
				+            "basket": "KYOTOGHG (AR4GWP100)",
			
 
				+            "basket_contents": [
			
 
				+                "CO2",
			
 
				+                "CH4",
			
 
				+                "N2O",
			
 
				+                "SF6",
			
 
				+                "HFCS (AR4GWP100)",
			
 
				+                "PFCS (AR4GWP100)",
			
 
				+            ],
			
 
				+            "skipna": True,
			
 
				+            "min_count": 1,
			
 
				+            "sel": {
			
 
				+                f'category ({coords_terminologies["category"]})': [
			
 
				+                    "0",
			
 
				+                    "1",
			
 
				+                    "1.A",
			
 
				+                    "1.A.1",
			
 
				+                    "1.A.2",
			
 
				+                    "1.A.3",
			
 
				+                    "1.A.4",
			
 
				+                    "1.A.5",
			
 
				+                    "1.B",
			
 
				+                    "1.B.1",
			
 
				+                    "1.B.2",
			
 
				+                    "1.B.3",
			
 
				+                    "1.C",
			
 
				+                    "2",
			
 
				+                    "2.A",
			
 
				+                    "2.A.1",
			
 
				+                    "2.A.2",
			
 
				+                    "2.A.3",
			
 
				+                    "2.A.4",
			
 
				+                    "2.B",
			
 
				+                    "2.C",
			
 
				+                    "2.D",
			
 
				+                    "2.F",
			
 
				+                    "2.G",
			
 
				+                    "2.H",
			
 
				+                    "3",
			
 
				+                    "3.A",
			
 
				+                    "3.B",
			
 
				+                    "3.C",
			
 
				+                    "3.D",
			
 
				+                    "3.E",
			
 
				+                    "3.F",
			
 
				+                    "3.G",
			
 
				+                    "3.H",
			
 
				+                    "3.I",
			
 
				+                    "4",
			
 
				+                    "4.A",
			
 
				+                    "4.B",
			
 
				+                    "4.C",
			
 
				+                    "4.D",
			
 
				+                    "4.E",
			
 
				+                    "4.E.1",
			
 
				+                    "4.E.2",
			
 
				+                    "4.E.3",
			
 
				+                    "5",
			
 
				+                    "5.A",
			
 
				+                    "5.B",
			
 
				+                    "5.C",
			
 
				+                    "5.D",
			
 
				                 ]
			
 
				-            }, # not tested
			
 
				+            },  # not tested
			
 
				         },
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 country_processing_step2 = {
			
 
				-    'downscale': {
			
 
				+    "downscale": {
			
 
				         # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
			
 
				         # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
			
 
				-        'sectors': {
			
 
				-            '1': {
			
 
				-                'basket': '1',
			
 
				-                'basket_contents': ['1.A', '1.B', '1.C'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+        "sectors": {
			
 
				+            "1": {
			
 
				+                "basket": "1",
			
 
				+                "basket_contents": ["1.A", "1.B", "1.C"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '1.A': {
			
 
				-                'basket': '1.A',
			
 
				-                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.5'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "1.A": {
			
 
				+                "basket": "1.A",
			
 
				+                "basket_contents": ["1.A.1", "1.A.2", "1.A.3", "1.A.4", "1.A.5"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '1.B': {
			
 
				-                'basket': '1.B',
			
 
				-                'basket_contents': ['1.B.1', '1.B.2', '1.B.3'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "1.B": {
			
 
				+                "basket": "1.B",
			
 
				+                "basket_contents": ["1.B.1", "1.B.2", "1.B.3"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '2': {
			
 
				-                'basket': '2',
			
 
				-                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.F', '2.G', '2.H'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "2": {
			
 
				+                "basket": "2",
			
 
				+                "basket_contents": ["2.A", "2.B", "2.C", "2.D", "2.F", "2.G", "2.H"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '2.A': {
			
 
				-                'basket': '2.A',
			
 
				-                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "2.A": {
			
 
				+                "basket": "2.A",
			
 
				+                "basket_contents": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '3': {
			
 
				-                'basket': '3',
			
 
				-                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                                    '3.H', '3.I'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "3": {
			
 
				+                "basket": "3",
			
 
				+                "basket_contents": [
			
 
				+                    "3.A",
			
 
				+                    "3.B",
			
 
				+                    "3.C",
			
 
				+                    "3.D",
			
 
				+                    "3.E",
			
 
				+                    "3.F",
			
 
				+                    "3.G",
			
 
				+                    "3.H",
			
 
				+                    "3.I",
			
 
				+                ],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '4': {
			
 
				-                'basket': '4',
			
 
				-                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "4": {
			
 
				+                "basket": "4",
			
 
				+                "basket_contents": ["4.A", "4.B", "4.C", "4.D", "4.E"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '4.E': {
			
 
				-                'basket': '4.E',
			
 
				-                'basket_contents': ['4.E.1', '4.E.2', '4.E.3'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "4.E": {
			
 
				+                "basket": "4.E",
			
 
				+                "basket_contents": ["4.E.1", "4.E.2", "4.E.3"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				-            '5': {
			
 
				-                'basket': '5',
			
 
				-                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
			
 
				-                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				-                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            "5": {
			
 
				+                "basket": "5",
			
 
				+                "basket_contents": ["5.A", "5.B", "5.C", "5.D"],
			
 
				+                "entities": ["KYOTOGHG (AR4GWP100)"],
			
 
				+                "dim": f'category ({coords_terminologies["category"]})',
			
 
				             },
			
 
				         },
			
 
				-        'entities': {
			
 
				-            'KYOTO': {
			
 
				-                'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				-                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
			
 
				-                                    'PFCS (AR4GWP100)', 'SF6'],
			
 
				-                'sel': {f'category ({coords_terminologies["category"]})':
			
 
				-                    [
			
 
				-                        '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				-                        '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
			
 
				-                        '1.C',
			
 
				-                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				-                        '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
			
 
				-                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				-                        '3.H', '3.I',
			
 
				-                        '4', '4.A', '4.B', '4.C', '4.D',
			
 
				-                        '4.E', '4.E.1', '4.E.2', '4.E.3',
			
 
				-                        '5', '5.A', '5.B', '5.C', '5.D']},
			
 
				+        "entities": {
			
 
				+            "KYOTO": {
			
 
				+                "basket": "KYOTOGHG (AR4GWP100)",
			
 
				+                "basket_contents": [
			
 
				+                    "CH4",
			
 
				+                    "CO2",
			
 
				+                    "N2O",
			
 
				+                    "HFCS (AR4GWP100)",
			
 
				+                    "PFCS (AR4GWP100)",
			
 
				+                    "SF6",
			
 
				+                ],
			
 
				+                "sel": {
			
 
				+                    f'category ({coords_terminologies["category"]})': [
			
 
				+                        "1",
			
 
				+                        "1.A",
			
 
				+                        "1.A.1",
			
 
				+                        "1.A.2",
			
 
				+                        "1.A.3",
			
 
				+                        "1.A.4",
			
 
				+                        "1.A.5",
			
 
				+                        "1.B",
			
 
				+                        "1.B.1",
			
 
				+                        "1.B.2",
			
 
				+                        "1.B.3",
			
 
				+                        "1.C",
			
 
				+                        "2",
			
 
				+                        "2.A",
			
 
				+                        "2.A.1",
			
 
				+                        "2.A.2",
			
 
				+                        "2.A.3",
			
 
				+                        "2.A.4",
			
 
				+                        "2.B",
			
 
				+                        "2.C",
			
 
				+                        "2.D",
			
 
				+                        "2.F",
			
 
				+                        "2.G",
			
 
				+                        "2.H",
			
 
				+                        "3",
			
 
				+                        "3.A",
			
 
				+                        "3.B",
			
 
				+                        "3.C",
			
 
				+                        "3.D",
			
 
				+                        "3.E",
			
 
				+                        "3.F",
			
 
				+                        "3.G",
			
 
				+                        "3.H",
			
 
				+                        "3.I",
			
 
				+                        "4",
			
 
				+                        "4.A",
			
 
				+                        "4.B",
			
 
				+                        "4.C",
			
 
				+                        "4.D",
			
 
				+                        "4.E",
			
 
				+                        "4.E.1",
			
 
				+                        "4.E.2",
			
 
				+                        "4.E.3",
			
 
				+                        "5",
			
 
				+                        "5.A",
			
 
				+                        "5.B",
			
 
				+                        "5.C",
			
 
				+                        "5.D",
			
 
				+                    ]
			
 
				+                },
			
 
				             },
			
 
				         },
			
 
				     },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': gwp_to_use,
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 cat_conversion = {
			
 
				-    'mapping': {
			
 
				-        '0': '0',
			
 
				-        'M.0.EL': 'M.0.EL',
			
 
				-        '1': '1',
			
 
				-        '1.A': '1.A',
			
 
				-        '1.A.1': '1.A.1',
			
 
				-        '1.A.1.a': '1.A.1.a',
			
 
				-        '1.A.1.b': '1.A.1.b',
			
 
				-        '1.A.2': '1.A.2',
			
 
				-        '1.A.3': '1.A.3',
			
 
				-        '1.A.3.a': '1.A.3.a',
			
 
				-        '1.A.3.b': '1.A.3.b',
			
 
				-        '1.A.3.c': '1.A.3.c',
			
 
				-        '1.A.3.d': '1.A.3.d',
			
 
				-        '1.A.4': '1.A.4',
			
 
				-        '1.A.5': '1.A.5',
			
 
				-        '1.B': '1.B',
			
 
				-        '1.B.1': '1.B.1',
			
 
				-        '1.B.2': '1.B.2',
			
 
				-        '1.B.3': '1.B.3',
			
 
				-        '1.C': '1.C',
			
 
				-        '1.C.1': '1.C.1',
			
 
				-        '1.C.2': '1.C.2',
			
 
				-        '1.C.3': '1.C.3',
			
 
				-        '2': '2',
			
 
				-        '2.A': '2.A',
			
 
				-        '2.A.1': '2.A.1',
			
 
				-        '2.A.2': '2.A.2',
			
 
				-        '2.A.3': '2.A.3',
			
 
				-        '2.A.4': '2.A.4',
			
 
				-        '2.A.4.b': '2.A.4.b',
			
 
				-        '2.A.4.d': '2.A.4.d',
			
 
				-        '2.B': '2.B',
			
 
				-        '2.B.2': '2.B.2',
			
 
				-        '2.B.4': '2.B.4',
			
 
				-        '2.B.8': '2.B.8',
			
 
				-        '2.B.8.b': '2.B.8.b',
			
 
				-        '2.B.8.c': '2.B.8.c',
			
 
				-        '2.B.8.e': '2.B.8.e',
			
 
				-        '2.B.8.f': '2.B.8.f',
			
 
				-        '2.C': '2.C',
			
 
				-        '2.C.1': '2.C.1',
			
 
				-        '2.D': '2.D',
			
 
				-        '2.D.1': '2.D.1',
			
 
				-        '2.F': '2.F',
			
 
				-        '2.F.1': '2.F.1',
			
 
				-        '2.G': '2.G',
			
 
				-        '2.G.1': '2.G.1',
			
 
				-        '2.H': '2.H',
			
 
				-        '2.H.1': '2.H.1',
			
 
				-        '2.H.2': '2.H.2',
			
 
				-        '3': 'M.AG',
			
 
				-        '3.A': '3.A.1',
			
 
				-        '3.B': '3.A.2',
			
 
				-        '3.C': 'M.3.C.1.b.i',  # field burning of agricultural residues
			
 
				-        '3.D': '3.C.2',  # Liming
			
 
				-        '3.E': '3.C.3',  # urea application
			
 
				-        '3.F': '3.C.4',  # direct N2O from agri soils
			
 
				-        '3.G': '3.C.5',  # indirect N2O from agri soils
			
 
				-        '3.H': '3.C.6',  # indirect N2O from manure management
			
 
				-        '3.I': '3.C.7',  # rice
			
 
				+    "mapping": {
			
 
				+        "0": "0",
			
 
				+        "M.0.EL": "M.0.EL",
			
 
				+        "1": "1",
			
 
				+        "1.A": "1.A",
			
 
				+        "1.A.1": "1.A.1",
			
 
				+        "1.A.1.a": "1.A.1.a",
			
 
				+        "1.A.1.b": "1.A.1.b",
			
 
				+        "1.A.2": "1.A.2",
			
 
				+        "1.A.3": "1.A.3",
			
 
				+        "1.A.3.a": "1.A.3.a",
			
 
				+        "1.A.3.b": "1.A.3.b",
			
 
				+        "1.A.3.c": "1.A.3.c",
			
 
				+        "1.A.3.d": "1.A.3.d",
			
 
				+        "1.A.4": "1.A.4",
			
 
				+        "1.A.5": "1.A.5",
			
 
				+        "1.B": "1.B",
			
 
				+        "1.B.1": "1.B.1",
			
 
				+        "1.B.2": "1.B.2",
			
 
				+        "1.B.3": "1.B.3",
			
 
				+        "1.C": "1.C",
			
 
				+        "1.C.1": "1.C.1",
			
 
				+        "1.C.2": "1.C.2",
			
 
				+        "1.C.3": "1.C.3",
			
 
				+        "2": "2",
			
 
				+        "2.A": "2.A",
			
 
				+        "2.A.1": "2.A.1",
			
 
				+        "2.A.2": "2.A.2",
			
 
				+        "2.A.3": "2.A.3",
			
 
				+        "2.A.4": "2.A.4",
			
 
				+        "2.A.4.b": "2.A.4.b",
			
 
				+        "2.A.4.d": "2.A.4.d",
			
 
				+        "2.B": "2.B",
			
 
				+        "2.B.2": "2.B.2",
			
 
				+        "2.B.4": "2.B.4",
			
 
				+        "2.B.8": "2.B.8",
			
 
				+        "2.B.8.b": "2.B.8.b",
			
 
				+        "2.B.8.c": "2.B.8.c",
			
 
				+        "2.B.8.e": "2.B.8.e",
			
 
				+        "2.B.8.f": "2.B.8.f",
			
 
				+        "2.C": "2.C",
			
 
				+        "2.C.1": "2.C.1",
			
 
				+        "2.D": "2.D",
			
 
				+        "2.D.1": "2.D.1",
			
 
				+        "2.F": "2.F",
			
 
				+        "2.F.1": "2.F.1",
			
 
				+        "2.G": "2.G",
			
 
				+        "2.G.1": "2.G.1",
			
 
				+        "2.H": "2.H",
			
 
				+        "2.H.1": "2.H.1",
			
 
				+        "2.H.2": "2.H.2",
			
 
				+        "3": "M.AG",
			
 
				+        "3.A": "3.A.1",
			
 
				+        "3.B": "3.A.2",
			
 
				+        "3.C": "M.3.C.1.b.i",  # field burning of agricultural residues
			
 
				+        "3.D": "3.C.2",  # Liming
			
 
				+        "3.E": "3.C.3",  # urea application
			
 
				+        "3.F": "3.C.4",  # direct N2O from agri soils
			
 
				+        "3.G": "3.C.5",  # indirect N2O from agri soils
			
 
				+        "3.H": "3.C.6",  # indirect N2O from manure management
			
 
				+        "3.I": "3.C.7",  # rice
			
 
				         #'4': 'M.LULUCF',
			
 
				-        '4.A': '3.B.1.a',  # forest remaining forest
			
 
				-        '4.B': '3.B.2.a',  # cropland remaining cropland
			
 
				-        '4.C': '3.B.2.b',  # land converted to cropland
			
 
				-        '4.D': '3.B.6.b',  # land converted to other land
			
 
				+        "4.A": "3.B.1.a",  # forest remaining forest
			
 
				+        "4.B": "3.B.2.a",  # cropland remaining cropland
			
 
				+        "4.C": "3.B.2.b",  # land converted to cropland
			
 
				+        "4.D": "3.B.6.b",  # land converted to other land
			
 
				         #'4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
			
 
				-        '4.E.1': '3.C.1.a', # biomass burning (Forest Land)
			
 
				-        '4.E.2': 'M.3.C.1.b.ii', # biomass burning (Cropland)
			
 
				-        '4.E.3': '3.C.1.d', # biomass burning (Other Land)
			
 
				-        '5': '4',
			
 
				-        '5.A': '4.A',
			
 
				-        '5.A.1': '4.A.1',
			
 
				-        '5.A.2': '4.A.2',
			
 
				-        '5.B': '4.B',
			
 
				-        '5.C': '4.C',
			
 
				-        '5.C.1': '4.C.1',
			
 
				-        '5.D': '4.D',
			
 
				-        '5.D.1': '4.D.1',
			
 
				-        '5.D.2': '4.D.2',
			
 
				-        'M.BK': 'M.BK',
			
 
				-        'M.BK.A': 'M.BK.A',
			
 
				-        'M.BK.M': 'M.BM.M',
			
 
				-        'M.BIO': 'M.BIO',
			
 
				+        "4.E.1": "3.C.1.a",  # biomass burning (Forest Land)
			
 
				+        "4.E.2": "M.3.C.1.b.ii",  # biomass burning (Cropland)
			
 
				+        "4.E.3": "3.C.1.d",  # biomass burning (Other Land)
			
 
				+        "5": "4",
			
 
				+        "5.A": "4.A",
			
 
				+        "5.A.1": "4.A.1",
			
 
				+        "5.A.2": "4.A.2",
			
 
				+        "5.B": "4.B",
			
 
				+        "5.C": "4.C",
			
 
				+        "5.C.1": "4.C.1",
			
 
				+        "5.D": "4.D",
			
 
				+        "5.D.1": "4.D.1",
			
 
				+        "5.D.2": "4.D.2",
			
 
				+        "M.BK": "M.BK",
			
 
				+        "M.BK.A": "M.BK.A",
			
 
				+        "M.BK.M": "M.BM.M",
			
 
				+        "M.BIO": "M.BIO",
			
 
				     },
			
 
				-    'aggregate': {
			
 
				-        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-        '3.C.1.b': {'sources': ['M.3.C.1.b.i', 'M.3.C.1.b.ii'],
			
 
				-                  'name': 'Biomass Burning In Cropland'},
			
 
				-        'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'],
			
 
				-                  'name': 'Biomass Burning (Agriculture)'},
			
 
				-        'M.3.C.1.LU': {'sources': ['3.C.1.a', '3.C.1.d'],
			
 
				-                  'name': 'Biomass Burning (LULUCF)'},
			
 
				-        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
			
 
				-                  'name': 'Emissions from Biomass Burning'},
			
 
				-        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-        'M.3.C.AG': {
			
 
				-            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-        'M.AG.ELV': {'sources': ['M.3.C.AG'],
			
 
				-                     'name': 'Agriculture excluding livestock emissions'},
			
 
				-        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
			
 
				-                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				-        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
			
 
				-        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
			
 
				-        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
			
 
				-        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
			
 
				-        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
			
 
				-        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    "aggregate": {
			
 
				+        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
			
 
				+        "3.C.1.b": {
			
 
				+            "sources": ["M.3.C.1.b.i", "M.3.C.1.b.ii"],
			
 
				+            "name": "Biomass Burning In Cropland",
			
 
				+        },
			
 
				+        "M.3.C.1.AG": {
			
 
				+            "sources": ["3.C.1.b", "3.C.1.c"],
			
 
				+            "name": "Biomass Burning (Agriculture)",
			
 
				+        },
			
 
				+        "M.3.C.1.LU": {
			
 
				+            "sources": ["3.C.1.a", "3.C.1.d"],
			
 
				+            "name": "Biomass Burning (LULUCF)",
			
 
				+        },
			
 
				+        "3.C.1": {
			
 
				+            "sources": ["M.3.C.1.AG", "M.3.C.1.LU"],
			
 
				+            "name": "Emissions from Biomass Burning",
			
 
				+        },
			
 
				+        "3.C": {
			
 
				+            "sources": ["3.C.1", "3.C.2", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land",
			
 
				+        },
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "M.3.C.1.AG",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
			
 
				+        },
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG"],
			
 
				+            "name": "Agriculture excluding livestock emissions",
			
 
				+        },
			
 
				+        "M.3.C.LU": {
			
 
				+            "sources": ["M.3.C.1.LU"],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
			
 
				+        },
			
 
				+        "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},
			
 
				+        "3.B.2": {"sources": ["3.B.2.a", "3.B.2.b"], "name": "Cropland"},
			
 
				+        "3.B.6": {"sources": ["3.B.6.b"], "name": "Other Land"},
			
 
				+        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.6"], "name": "Land"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "N.3.C.LU"], "name": "LULUCF"},
			
 
				+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
			
 
				     },
			
 
				 }
			
 
				 
			
 
				 sectors_to_save = [
			
 
				-    '1', '1.A', '1.A.1', '1.A.1.a', '1.A.1.b', '1.A.2', '1.A.3', '1.A.3.a', '1.A.3.b',
			
 
				-    '1.A.3.c', '1.A.3.d', '1.A.4', '1.A.5',
			
 
				-    '1.B', '1.B.1', '1.B.2', '1.B.3', '1.C', '1.C.1', '1.C.2', '1.C.3',
			
 
				-    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
			
 
				-    '2.B', '2.B.2', '2.B.4', '2.B.8', '2.B.8.a', '2.B.8.c', '2.B.8.e', '2.B.8.f',
			
 
				-    '2.C', '2.C.1', '2.F', '2.F.1', '2.G', '2.G.1', '2.H', '2.H.1', '2.H.2',
			
 
				-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
			
 
				-    '3.C', '3.C.1', '3.C.1.a', '3.C.1.b', '3.C.1.d', '3.C.2', '3.C.3', '3.C.4',
			
 
				-    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
			
 
				-    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
			
 
				-    '3.B.2.b', '3.B.6', '3.B.6.b',
			
 
				-    '4', '4.A', '4.A.1', '4.A.2', '4.B', '4.C', '4.C.1', '4.D', '4.D.1', '4.D.2',
			
 
				-    '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO']
			
 
				+    "1",
			
 
				+    "1.A",
			
 
				+    "1.A.1",
			
 
				+    "1.A.1.a",
			
 
				+    "1.A.1.b",
			
 
				+    "1.A.2",
			
 
				+    "1.A.3",
			
 
				+    "1.A.3.a",
			
 
				+    "1.A.3.b",
			
 
				+    "1.A.3.c",
			
 
				+    "1.A.3.d",
			
 
				+    "1.A.4",
			
 
				+    "1.A.5",
			
 
				+    "1.B",
			
 
				+    "1.B.1",
			
 
				+    "1.B.2",
			
 
				+    "1.B.3",
			
 
				+    "1.C",
			
 
				+    "1.C.1",
			
 
				+    "1.C.2",
			
 
				+    "1.C.3",
			
 
				+    "2",
			
 
				+    "2.A",
			
 
				+    "2.A.1",
			
 
				+    "2.A.2",
			
 
				+    "2.A.3",
			
 
				+    "2.A.4",
			
 
				+    "2.A.4.b",
			
 
				+    "2.A.4.d",
			
 
				+    "2.B",
			
 
				+    "2.B.2",
			
 
				+    "2.B.4",
			
 
				+    "2.B.8",
			
 
				+    "2.B.8.a",
			
 
				+    "2.B.8.c",
			
 
				+    "2.B.8.e",
			
 
				+    "2.B.8.f",
			
 
				+    "2.C",
			
 
				+    "2.C.1",
			
 
				+    "2.F",
			
 
				+    "2.F.1",
			
 
				+    "2.G",
			
 
				+    "2.G.1",
			
 
				+    "2.H",
			
 
				+    "2.H.1",
			
 
				+    "2.H.2",
			
 
				+    "3",
			
 
				+    "M.AG",
			
 
				+    "3.A",
			
 
				+    "3.A.1",
			
 
				+    "3.A.2",
			
 
				+    "3.C",
			
 
				+    "3.C.1",
			
 
				+    "3.C.1.a",
			
 
				+    "3.C.1.b",
			
 
				+    "3.C.1.d",
			
 
				+    "3.C.2",
			
 
				+    "3.C.3",
			
 
				+    "3.C.4",
			
 
				+    "3.C.5",
			
 
				+    "3.C.6",
			
 
				+    "3.C.7",
			
 
				+    "M.3.C.1.AG",
			
 
				+    "M.3.C.AG",
			
 
				+    "M.AG.ELV",
			
 
				+    "M.LULUCF",
			
 
				+    "M.3.C.1.LU",
			
 
				+    "M.3.C.LU",
			
 
				+    "3.B",
			
 
				+    "3.B.1",
			
 
				+    "3.B.1.a",
			
 
				+    "3.B.2",
			
 
				+    "3.B.2.a",
			
 
				+    "3.B.2.b",
			
 
				+    "3.B.6",
			
 
				+    "3.B.6.b",
			
 
				+    "4",
			
 
				+    "4.A",
			
 
				+    "4.A.1",
			
 
				+    "4.A.2",
			
 
				+    "4.B",
			
 
				+    "4.C",
			
 
				+    "4.C.1",
			
 
				+    "4.D",
			
 
				+    "4.D.1",
			
 
				+    "4.D.2",
			
 
				+    "0",
			
 
				+    "M.0.EL",
			
 
				+    "M.BK",
			
 
				+    "M.BK.A",
			
 
				+    "M.BK.M",
			
 
				+    "M.BIO",
			
 
				+]
			
 
				 
			
 
				 
			
 
				 # gas baskets
			
 
				 gas_baskets = {
			
 
				-    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
			
 
				-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
			
 
				-    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				-    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				-    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				-    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -1,10 +1,14 @@
 
				-# this script reads data from Thailand's BUR3
			
 
				-# Data is read from the pdf file
			
 
				+"""
			
 
				+Read Thailand's BUR3 from pdf
			
 
				 
			
 
				+This script reads data from Thailand's BUR3
			
 
				+Data are read from pdf using camelot
			
 
				+
			
 
				+"""
			
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_tha_bur3 import (
			
 
				+from config_tha_bur3 import (
			
 
				     cat_conversion,
			
 
				     coords_cols,
			
 
				     coords_cols_indirect,
			
@@ -26,53 +30,65 @@ from .config_tha_bur3 import (
 
				     trend_conf,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Thailand' / 'BUR3'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Thailand'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Thailand" / "BUR3"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Thailand"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    inventory_file = 'BUR3_Thailand_251220_.pdf'
			
 
				-    output_filename = 'THA_BUR3_2020_'
			
 
				+    inventory_file = "BUR3_Thailand_251220_.pdf"
			
 
				+    output_filename = "THA_BUR3_2020_"
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				     # inventory tables
			
 
				-    pages_inventory = '68,69'
			
 
				+    pages_inventory = "68,69"
			
 
				 
			
 
				     # main sector time series
			
 
				-    page_main_sector_ts = '70'
			
 
				+    page_main_sector_ts = "70"
			
 
				 
			
 
				     # indirect gases time series
			
 
				-    page_indirect = '72'
			
 
				-
			
 
				+    page_indirect = "72"
			
 
				 
			
 
				     # ###
			
 
				     # read the inventory data and convert to PM2 IF
			
 
				     # ###
			
 
				-    tables_inventory = camelot.read_pdf(str(input_folder / inventory_file), pages=pages_inventory,
			
 
				-                                        split_text=True, flavor="lattice")
			
 
				+    tables_inventory = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file),
			
 
				+        pages=pages_inventory,
			
 
				+        split_text=True,
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     df_inventory = tables_inventory[0].df[1:]
			
 
				     df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				 
			
 
				-    df_inventory = pd.concat([df_header, df_inventory, tables_inventory[1].df.iloc[1:]],
			
 
				-                             axis=0, join='outer')
			
 
				+    df_inventory = pd.concat(
			
 
				+        [df_header, df_inventory, tables_inventory[1].df.iloc[1:]], axis=0, join="outer"
			
 
				+    )
			
 
				 
			
 
				-    df_inventory = pm2.pm2io.nir_add_unit_information(df_inventory,
			
 
				-                                                      unit_row=inv_conf["unit_row"],
			
 
				-                                                      entity_row=inv_conf["entity_row"],
			
 
				-                                                      regexp_entity=".*", regexp_unit=".*",
			
 
				-                                                      default_unit="Gg")
			
 
				+    df_inventory = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_inventory,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				     # set index and convert to long format
			
 
				     df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				-    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
			
 
				-                                                         inv_conf["header_long"])
			
 
				+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_inventory, inv_conf["year"], inv_conf["header_long"]
			
 
				+    )
			
 
				     df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				 
			
 
				     # prep for conversion to PM2 IF and native format
			
@@ -81,24 +97,29 @@ if __name__ == "__main__":
 
				 
			
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				-    df_inventory_long["category"] = \
			
 
				-        df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]
			
 
				+    )
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-        return m.group('code')
			
 
				-    df_inventory_long["category"] = \
			
 
				-        df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
			
 
				-                                                  regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				     df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				 
			
 
				     # replace "," with "" in data
			
 
				-    def repl(m):
			
 
				-        return m.group('part1') + m.group('part2')
			
 
				-    df_inventory_long.loc[:, "data"] = \
			
 
				-        df_inventory_long.loc[:, "data"].str.replace(
			
 
				-            '(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.\
			
 
				-        replace(' ','', regex=False)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace(
			
 
				+        "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+    )
			
 
				+    df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace(
			
 
				+        " ", "", regex=False
			
 
				+    )
			
 
				 
			
 
				     # make sure all col headers are str
			
 
				     df_inventory_long.columns = df_inventory_long.columns.map(str)
			
@@ -108,27 +129,31 @@ if __name__ == "__main__":
 
				     data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_inventory_long,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # read the main sector time series and convert to PM2 IF
			
 
				     # ###
			
 
				-    tables_main_sector_ts = camelot.read_pdf(str(input_folder / inventory_file), pages=page_main_sector_ts,
			
 
				-                                        split_text=True, flavor="lattice")
			
 
				+    tables_main_sector_ts = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file),
			
 
				+        pages=page_main_sector_ts,
			
 
				+        split_text=True,
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     df_main_sector_ts = tables_main_sector_ts[0].df.iloc[2:]
			
 
				-    #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				-    #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				+    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				+    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				     df_main_sector_ts.columns = [trend_conf["header"], trend_conf["unit"]]
			
 
				 
			
 
				     df_main_sector_ts = df_main_sector_ts.transpose()
			
@@ -141,42 +166,49 @@ if __name__ == "__main__":
 
				 
			
 
				     # replace cat names by codes in col "category"
			
 
				     df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(
			
 
				-        trend_conf["cat_codes_manual"])
			
 
				+        trend_conf["cat_codes_manual"]
			
 
				+    )
			
 
				 
			
 
				-    def repl(m):
			
 
				-        return m.group('part1') + m.group('part2')
			
 
				-    year_cols = list(set(df_main_sector_ts.columns) - set(['category', 'unit']))
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+    year_cols = list(set(df_main_sector_ts.columns) - set(["category", "unit"]))
			
 
				     for col in year_cols:
			
 
				-        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
			
 
				-            replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
			
 
				-            replace(' ','', regex=False)
			
 
				+        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace(
			
 
				+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+        )
			
 
				+        df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace(
			
 
				+            " ", "", regex=False
			
 
				+        )
			
 
				 
			
 
				     data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_main_sector_ts,
			
 
				         coords_cols=coords_cols_main_sector_ts,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults_main_sector_ts,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				-
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # read the indirect gases time series and convert to PM2 IF
			
 
				     # ###
			
 
				-    tables_indirect = camelot.read_pdf(str(input_folder / inventory_file), pages=page_indirect,
			
 
				-                                        split_text=True, flavor="lattice")
			
 
				+    tables_indirect = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file),
			
 
				+        pages=page_indirect,
			
 
				+        split_text=True,
			
 
				+        flavor="lattice",
			
 
				+    )
			
 
				 
			
 
				     df_indirect = tables_indirect[0].df.iloc[2:]
			
 
				-    #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				-    #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				+    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				+    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				     df_indirect.columns = [ind_conf["header"], ind_conf["unit"]]
			
 
				 
			
 
				     df_indirect = df_indirect.transpose()
			
@@ -188,29 +220,32 @@ if __name__ == "__main__":
 
				     df_indirect = df_indirect.drop(0)
			
 
				     df_indirect = df_indirect.drop(columns=ind_conf["cols_to_remove"])
			
 
				 
			
 
				-    def repl(m):
			
 
				-        return m.group('part1') + m.group('part2')
			
 
				-    year_cols = list(set(df_indirect.columns) - set(['entity', 'unit']))
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("part1") + m.group("part2")
			
 
				+
			
 
				+    year_cols = list(set(df_indirect.columns) - set(["entity", "unit"]))
			
 
				     for col in year_cols:
			
 
				-        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
			
 
				-            replace('(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$', repl, regex=True)
			
 
				-        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
			
 
				-            replace(' ','', regex=False)
			
 
				+        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace(
			
 
				+            "(?P<part1>[0-9]+),(?P<part2>[0-9\\.]+)$", repl, regex=True
			
 
				+        )
			
 
				+        df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace(
			
 
				+            " ", "", regex=False
			
 
				+        )
			
 
				 
			
 
				     data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_indirect,
			
 
				         coords_cols=coords_cols_indirect,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults_indirect,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # merge the three datasets
			
@@ -231,12 +266,15 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-        data_all_if)
			
 
				+        data_all_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_all_pm2.data_vars}
			
 
				     data_all_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # ## process the data
			
@@ -244,14 +282,15 @@ if __name__ == "__main__":
 
				     data_proc_pm2 = data_all_pm2
			
 
				 
			
 
				     # combine CO2 emissions and removals
			
 
				-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
			
 
				-        (dim="entity", skipna=True, min_count=1)
			
 
				-    data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
			
 
				+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
			
 
				+        dim="entity", skipna=True, min_count=1
			
 
				+    )
			
 
				+    data_proc_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				 
			
 
				     # actual processing
			
 
				     data_proc_pm2 = process_data_for_country(
			
 
				         data_proc_pm2,
			
 
				-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
			
 
				+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
			
 
				         gas_baskets={},
			
 
				         processing_info_country=country_processing_step1,
			
 
				     )
			
@@ -261,16 +300,16 @@ if __name__ == "__main__":
 
				         entities_to_ignore=[],
			
 
				         gas_baskets=gas_baskets,
			
 
				         processing_info_country=country_processing_step2,
			
 
				-        cat_terminology_out = terminology_proc,
			
 
				-        category_conversion = cat_conversion,
			
 
				-        sectors_out = sectors_to_save,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        category_conversion=cat_conversion,
			
 
				+        sectors_out=sectors_to_save,
			
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				     # TODO: processing info is present twice
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -279,9 +318,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
@@ -1,18 +1,27 @@
 
				-# this script reads data from Thailand's BUR3
			
 
				-# Data is read from two csv files which have been created manually from ocr processed
			
 
				-# pdf files
			
 
				-# pdftk Thailand_BUR4_final_28122022.pdf cat 65-67east output inventory_2019.pdf
			
 
				-# ocrmypdf --force-ocr inventory_2019.pdf inventory_2019_ocr.pdf
			
 
				-# pdftk Thailand_BUR4_final_28122022.pdf cat 69 output trends.pdf
			
 
				-# ocrmypdf --force-ocr trends.pdf trends_ocr.pdf
			
 
				-
			
 
				-# values for HFCs and SF6 have been taken from Table2-9 where they are present in
			
 
				-# CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
			
 
				-# tables
			
 
				+"""
			
 
				+Read Thailand's BUR4 from pdf
			
 
				+
			
 
				+This script reads data from Thailand's BUR4
			
 
				+Data is read from two csv files which have been created manually from ocr processed
			
 
				+pdf files
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    pdftk Thailand_BUR4_final_28122022.pdf cat 65-67east output inventory_2019.pdf
			
 
				+    ocrmypdf --force-ocr inventory_2019.pdf inventory_2019_ocr.pdf
			
 
				+    pdftk Thailand_BUR4_final_28122022.pdf cat 69 output trends.pdf
			
 
				+    ocrmypdf --force-ocr trends.pdf trends_ocr.pdf
			
 
				+
			
 
				+Values for HFCs and SF6 have been taken from Table2-9 where they are present in
			
 
				+CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
			
 
				+tables
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from .config_tha_bur4 import (
			
 
				+from config_tha_bur4 import (
			
 
				     cat_codes_manual_main_sector_ts,
			
 
				     cat_conversion,
			
 
				     coords_cols,
			
@@ -33,36 +42,45 @@ from .config_tha_bur4 import (
 
				     terminology_proc,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, process_data_for_country
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
 
				     # ###
			
 
				-    input_folder = downloaded_data_path / 'UNFCCC' / 'Thailand' / 'BUR4'
			
 
				-    output_folder = extracted_data_path / 'UNFCCC' / 'Thailand'
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Thailand" / "BUR4"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Thailand"
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				 
			
 
				-    inventory_file = 'THA_inventory_2019.csv'
			
 
				-    trends_file = 'THA_trends_2000-2019.csv'
			
 
				-    indirect_file = 'THA_indirect_2000-2019.csv'
			
 
				-    output_filename = 'THA_BUR4_2022_'
			
 
				+    inventory_file = "THA_inventory_2019.csv"
			
 
				+    trends_file = "THA_trends_2000-2019.csv"
			
 
				+    indirect_file = "THA_indirect_2000-2019.csv"
			
 
				+    output_filename = "THA_BUR4_2022_"
			
 
				 
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				-
			
 
				     # ###
			
 
				     # read the inventory data and convert to PM2 IF
			
 
				     # ###
			
 
				-    df_inventory = pd.read_csv(input_folder /inventory_file, header=None)
			
 
				+    df_inventory = pd.read_csv(input_folder / inventory_file, header=None)
			
 
				     df_inventory = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_inventory, unit_row=inv_conf["unit_row"], entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*", regexp_unit=".*", default_unit="Gg")
			
 
				+        df_inventory,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				     # set index and convert to long format
			
 
				     df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				-    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
			
 
				-                                                         inv_conf["header_long"])
			
 
				+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_inventory, inv_conf["year"], inv_conf["header_long"]
			
 
				+    )
			
 
				     df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				 
			
 
				     # prep for conversion to PM2 IF and native format
			
@@ -71,14 +89,17 @@ if __name__ == "__main__":
 
				 
			
 
				     # replace cat names by codes in col "category"
			
 
				     # first the manual replacements
			
 
				-    df_inventory_long["category"] = \
			
 
				-        df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]
			
 
				+    )
			
 
				+
			
 
				     # then the regex replacements
			
 
				-    def repl(m):
			
 
				-        return m.group('code')
			
 
				-    df_inventory_long["category"] = \
			
 
				-        df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
			
 
				-                                                  regex=True)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				     df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				 
			
 
				     # make sure all col headers are str
			
@@ -89,17 +110,17 @@ if __name__ == "__main__":
 
				     data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_inventory_long,
			
 
				         coords_cols=coords_cols,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # read the main sector time series and convert to PM2 IF
			
@@ -115,24 +136,24 @@ if __name__ == "__main__":
 
				     df_main_sector_ts = df_main_sector_ts.drop(0)
			
 
				 
			
 
				     # replace cat names by codes in col "category"
			
 
				-    df_main_sector_ts["category"] = \
			
 
				-        df_main_sector_ts["category"].replace(cat_codes_manual_main_sector_ts)
			
 
				+    df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(
			
 
				+        cat_codes_manual_main_sector_ts
			
 
				+    )
			
 
				 
			
 
				     data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_main_sector_ts,
			
 
				         coords_cols=coords_cols_main_sector_ts,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults_main_sector_ts,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				-        time_format='%Y',
			
 
				-        )
			
 
				-
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # read the indirect gases time series and convert to PM2 IF
			
@@ -150,17 +171,17 @@ if __name__ == "__main__":
 
				     data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_indirect,
			
 
				         coords_cols=coords_cols_indirect,
			
 
				-        #add_coords_cols=add_coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults_indirect,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        #coords_value_filling=coords_value_filling,
			
 
				-        #filter_remove=filter_remove,
			
 
				-        #filter_keep=filter_keep,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				         convert_str=True,
			
 
				         time_format="%Y",
			
 
				-        )
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # merge the three datasets
			
@@ -181,12 +202,15 @@ if __name__ == "__main__":
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				         output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-        data_all_if)
			
 
				+        data_all_if,
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_all_pm2.data_vars}
			
 
				     data_all_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				     # ###
			
 
				     # ## process the data
			
@@ -194,14 +218,15 @@ if __name__ == "__main__":
 
				     data_proc_pm2 = data_all_pm2
			
 
				 
			
 
				     # combine CO2 emissions and removals
			
 
				-    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
			
 
				-        (dim="entity", skipna=True, min_count=1)
			
 
				-    data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
			
 
				+    data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum(
			
 
				+        dim="entity", skipna=True, min_count=1
			
 
				+    )
			
 
				+    data_proc_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				 
			
 
				     # actual processing
			
 
				     data_proc_pm2 = process_data_for_country(
			
 
				         data_proc_pm2,
			
 
				-        entities_to_ignore=['CO2 emissions', 'CO2 removals'],
			
 
				+        entities_to_ignore=["CO2 emissions", "CO2 removals"],
			
 
				         gas_baskets={},
			
 
				         processing_info_country=country_processing_step1,
			
 
				     )
			
@@ -211,16 +236,16 @@ if __name__ == "__main__":
 
				         entities_to_ignore=[],
			
 
				         gas_baskets=gas_baskets,
			
 
				         processing_info_country=country_processing_step2,
			
 
				-        cat_terminology_out = terminology_proc,
			
 
				-        category_conversion = cat_conversion,
			
 
				-        sectors_out = sectors_to_save,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        category_conversion=cat_conversion,
			
 
				+        sectors_out=sectors_to_save,
			
 
				     )
			
 
				 
			
 
				     # adapt source and metadata
			
 
				     # TODO: processing info is present twice
			
 
				-    current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				     data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-    data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				 
			
 
				     # ###
			
 
				     # save data to IF and native format
			
@@ -229,9 +254,10 @@ if __name__ == "__main__":
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
 
				     pm2.pm2io.write_interchange_format(
			
 
				-        output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				     encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				     data_proc_pm2.pr.to_netcdf(
			
 
				-        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				-        encoding=encoding)
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/__init__.py
@@ -1 +1,16 @@
 
				-#
			
 
				+"""Read individual country submissions
			
 
				+
			
 
				+The UNFCCC reader contains code to read individual country inventories,
			
 
				+mostly submitted by non-AnnexI countries to the UNFCCC as Biannial Update Reports (
			
 
				+BUR), National Communications (NC), and National Inventory Reports (NIR). Code tyo
			
 
				+read other official country repositories is also included here as it uses the same
			
 
				+setup.
			
 
				+
			
 
				+The code is organized in country folders which contain scripts for each submission
			
 
				+and configuration files which can also be used for several submissions if the
			
 
				+configuration is sufficiently similar.
			
 
				+
			
 
				+Data are mostly read from pdf files using camelot, but in some cases machine-readable
			
 
				+files like xlsx are available which we prefer over pdfs.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/get_submissions_info.py
+++ b/src/unfccc_ghg_data/unfccc_reader/get_submissions_info.py
@@ -1,19 +1,28 @@
 
				-# helper functions to get information on available submissions
			
 
				-# and data reading functions for a given country
			
 
				+"""
			
 
				+Helper functions for the unfccc_reader
			
 
				+
			
 
				+helper functions to get information on available submissions
			
 
				+and data reading functions for a given country
			
 
				+"""
			
 
				 
			
 
				 import json
			
 
				 from pathlib import Path
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (downloaded_data_path, extracted_data_path,
			
 
				-                                    get_country_code, root_path)
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    get_country_code,
			
 
				+    root_path,
			
 
				+)
			
 
				 
			
 
				 code_path = root_path / "src" / "unfccc_ghg_data" / "unfccc_reader"
			
 
				 # TODO: change this to use the code path stored in the helper module
			
 
				 
			
 
				+
			
 
				 def get_possible_inputs(
			
 
				-        country_name: str,
			
 
				-        submission: str,
			
 
				-        print_info: bool = False,
			
 
				+    country_name: str,
			
 
				+    submission: str,
			
 
				+    print_info: bool = False,
			
 
				 ) -> list[Path]:
			
 
				     """
			
 
				     For given country name and submission find the possible input files
			
@@ -71,10 +80,10 @@ def get_possible_inputs(
 
				 
			
 
				 
			
 
				 def get_possible_outputs(
			
 
				-        country_name: str,
			
 
				-        submission: str,
			
 
				-        print_info: bool = False,
			
 
				-)-> list[Path]:
			
 
				+    country_name: str,
			
 
				+    submission: str,
			
 
				+    print_info: bool = False,
			
 
				+) -> list[Path]:
			
 
				     """
			
 
				     For given country name and submission find the possible output files
			
 
				 
			
@@ -109,11 +118,15 @@ def get_possible_outputs(
 
				             if country_code in folder_mapping:
			
 
				                 country_folder = folder_mapping[country_code]
			
 
				                 if not isinstance(country_folder, str):
			
 
				-                    raise ValueError("Wrong data type in folder mapping json file. Should be str.")
			
 
				+                    raise ValueError(
			
 
				+                        "Wrong data type in folder mapping " "json file. Should be str."
			
 
				+                    )
			
 
				 
			
 
				                 output_folder = item / country_folder
			
 
				                 if output_folder.exists():
			
 
				-                    for filepath in output_folder.glob(country_code + "_" + submission + "*"):
			
 
				+                    for filepath in output_folder.glob(
			
 
				+                        country_code + "_" + submission + "*"
			
 
				+                    ):
			
 
				                         output_files.append(filepath.relative_to(root_path))
			
 
				 
			
 
				     if print_info:
			
@@ -125,9 +138,3 @@ def get_possible_outputs(
 
				             print("No output files found")
			
 
				 
			
 
				     return output_files
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
--- a/src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py
+++ b/src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py
@@ -1,28 +1,34 @@
 
				-# this script takes submission and country as input (from doit) and
			
 
				-# runs the appropriate script to extract the submission data
			
 
				+"""
			
 
				+wrapper to read UNFCCC submission
			
 
				+
			
 
				+Take submission and country as input (from doit) and
			
 
				+run the appropriate script to extract the submission data
			
 
				+
			
 
				+"""
			
 
				+
			
 
				 
			
 
				 import argparse
			
 
				 
			
 
				 import datalad.api
			
 
				-from .get_submissions_info import (get_possible_inputs, get_possible_outputs)
			
 
				 
			
 
				 from unfccc_ghg_data.helper import get_code_file, root_path
			
 
				 
			
 
				+from .get_submissions_info import get_possible_inputs, get_possible_outputs
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # Find the right function and possible input and output files and
			
 
				     # read the data using datalad run.
			
 
				     parser = argparse.ArgumentParser()
			
 
				-    parser.add_argument('--country', help='Country name or code')
			
 
				-    parser.add_argument('--submission', help='Submission to read')
			
 
				+    parser.add_argument("--country", help="Country name or code")
			
 
				+    parser.add_argument("--submission", help="Submission to read")
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     country = args.country
			
 
				     submission = args.submission
			
 
				 
			
 
				-
			
 
				     print(f"Attempting to extract data for {submission} from {country}.")
			
 
				-    print("#"*80)
			
 
				+    print("#" * 80)
			
 
				     print("")
			
 
				 
			
 
				     # get the correct script
			
@@ -35,8 +41,10 @@ if __name__ == "__main__":
 
				         # get possible input files
			
 
				         input_files = get_possible_inputs(country, submission)
			
 
				         if not input_files:
			
 
				-            print(f"No possible input files found for {country}, {submission}. "
			
 
				-                  f"Something might be wrong here.")
			
 
				+            print(
			
 
				+                f"No possible input files found for {country}, {submission}. "
			
 
				+                f"Something might be wrong here."
			
 
				+            )
			
 
				         else:
			
 
				             print("Found the following input_files:")
			
 
				             for file in input_files:
			
@@ -51,8 +59,10 @@ if __name__ == "__main__":
 
				         # get possible output files
			
 
				         output_files = get_possible_outputs(country, submission)
			
 
				         if not output_files:
			
 
				-            print(f"No possible output files found for {country}, {submission}. "
			
 
				-                  f"This is either the first run or something is wrong.")
			
 
				+            print(
			
 
				+                f"No possible output files found for {country}, {submission}. "
			
 
				+                f"This is either the first run or something is wrong."
			
 
				+            )
			
 
				         else:
			
 
				             print("Found the following output_files:")
			
 
				             for file in output_files:
			
@@ -74,6 +84,7 @@ if __name__ == "__main__":
 
				     else:
			
 
				         # no code found.
			
 
				         print(f"No code found to read {submission} from {country}")
			
 
				-        print(f"Use 'doit country_info country={country} to get "
			
 
				-              f"a list of available submissions and datasets.")
			
 
				-
			
 
				+        print(
			
 
				+            f"Use 'doit country_info country={country} to get "
			
 
				+            f"a list of available submissions and datasets."
			
 
				+        )