před 10 měsíci · 48a3ba0c7a
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -130,6 +130,7 @@ docstring-code-format = true
 
				 ]
			
 
				 "src/unfccc_ghg_data/unfccc_reader/*/config_*.py" = [
			
 
				     "E501",  # don't enforce line length
			
 
				+    "RUF001",  #
			
 
				 ]
			
 
				 "src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/*_specification.py" = [
			
 
				     "E501",  # don't enforce line length
			
--- a/src/unfccc_ghg_data/helper/functions.py
+++ b/src/unfccc_ghg_data/helper/functions.py
@@ -173,9 +173,7 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
 
				                 remove_info = copy.deepcopy(processing_info_country["remove_ts"][case])
			
 
				                 entities = remove_info.pop("entities")
			
 
				                 for entity in entities:
			
 
				-                    data_country[entity].pr.loc[remove_info] = (
			
 
				-                        data_country[entity].pr.loc[remove_info] * np.nan
			
 
				-                    )
			
 
				+                    data_country[entity].pr.loc[remove_info] *= np.nan
			
 
				 
			
 
				         # remove all data for given years if necessary
			
 
				         if "remove_years" in processing_info_country:
			
@@ -270,7 +268,7 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
 
				             warnings.warn(
			
 
				                 'The "aggregate_cats" flag is deprecated and will '
			
 
				                 "be removed in a future version. Please use "
			
 
				-                '"aggregate_coord" with key "category" instead',
			
 
				+                '"aggregate_coords" with key "category" instead',
			
 
				                 category=DeprecationWarning,
			
 
				             )
			
 
				             print(
			
@@ -293,7 +291,7 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
 
				                 min_count=1,
			
 
				             )
			
 
				 
			
 
				-        if "aggregate_coord" in processing_info_country:
			
 
				+        if "aggregate_coords" in processing_info_country:
			
 
				             print(
			
 
				                 f"Aggregating data for country {country_code}, source {source}, "
			
 
				                 f"scenario {scenario}"
			
@@ -942,6 +940,7 @@ def fix_rows(
 
				         new_row = new_row.str.replace("N O", "NO")
			
 
				         new_row = new_row.str.replace(", N", ",N")
			
 
				         new_row = new_row.str.replace("- ", "-")
			
 
				+        new_row = new_row.str.strip()
			
 
				         # replace spaces in numbers
			
 
				         pat = r"^(?P<first>[0-9\.,]*)\s(?P<last>[0-9\.,]*)$"
			
 
				 
			
--- a/src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_config.py
+++ b/src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_config.py
@@ -1989,18 +1989,18 @@ di_processing_templates = {
 
				     # MKD:
			
 
				     "MKD": {
			
 
				         "DI2023-05-24": {  # 1990-2009
			
 
				-            "downscale": {
			
 
				-                "entities": {
			
 
				-                    "FGASES": {
			
 
				-                        "basket": f"FGASES ({gwp_to_use})",
			
 
				-                        "basket_contents": [f"HFCS ({gwp_to_use})"],
			
 
				-                    },
			
 
				-                    "HFC": {
			
 
				-                        "basket": f"HFCS ({gwp_to_use})",
			
 
				-                        "basket_contents": [f"UnspMixOfHFCs ({gwp_to_use})"],
			
 
				-                    },
			
 
				-                },
			
 
				-            },
			
 
				+            # "downscale": {
			
 
				+            #     "entities": {
			
 
				+            #         "FGASES": {
			
 
				+            #             "basket": f"FGASES ({gwp_to_use})",
			
 
				+            #             "basket_contents": [f"HFCS ({gwp_to_use})"],
			
 
				+            #         },
			
 
				+            #         "HFC": {
			
 
				+            #             "basket": f"HFCS ({gwp_to_use})",
			
 
				+            #             "basket_contents": [f"UnspMixOfHFCs ({gwp_to_use})"],
			
 
				+            #         },
			
 
				+            #     },
			
 
				+            # },
			
 
				             "basket_copy": {
			
 
				                 "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				                 "entities": ["UnspMixOfHFCs"],
			
@@ -2020,16 +2020,16 @@ di_processing_templates = {
 
				                         "sel": {"time": ["1995", "2000"]},
			
 
				                     },
			
 
				                 },
			
 
				-                "entities": {
			
 
				-                    "FGASES": {
			
 
				-                        "basket": f"FGASES ({gwp_to_use})",
			
 
				-                        "basket_contents": [f"HFCS ({gwp_to_use})"],
			
 
				-                    },
			
 
				-                    "HFC": {
			
 
				-                        "basket": f"HFCS ({gwp_to_use})",
			
 
				-                        "basket_contents": [f"UnspMixOfHFCs ({gwp_to_use})"],
			
 
				-                    },
			
 
				-                },
			
 
				+                # "entities": {
			
 
				+                #     "FGASES": {
			
 
				+                #         "basket": f"FGASES ({gwp_to_use})",
			
 
				+                #         "basket_contents": [f"HFCS ({gwp_to_use})"],
			
 
				+                #     },
			
 
				+                #     "HFC": {
			
 
				+                #         "basket": f"HFCS ({gwp_to_use})",
			
 
				+                #         "basket_contents": [f"UnspMixOfHFCs ({gwp_to_use})"],
			
 
				+                #     },
			
 
				+                # },
			
 
				             },
			
 
				             "basket_copy": {
			
 
				                 "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
@@ -2268,13 +2268,23 @@ di_processing_templates = {
 
				             },
			
 
				             "remove_ts": {
			
 
				                 "M.AG.ELV": {
			
 
				-                    "category": ["4", "4.D", "4.E", "4.F", "15163", "24540"],
			
 
				-                    "entities": ["N2O", f"KYOTOGHG ({gwp_to_use})"],
			
 
				+                    "category": [
			
 
				+                        "4",
			
 
				+                        "4.A",
			
 
				+                        "4.B",
			
 
				+                        "4.C",
			
 
				+                        "4.D",
			
 
				+                        "4.E",
			
 
				+                        "4.F",
			
 
				+                        "15163",
			
 
				+                        "24540",
			
 
				+                    ],
			
 
				+                    "entities": ["N2O", f"KYOTOGHG ({gwp_to_use})", "CH4", "NOx", "CO"],
			
 
				                     "time": ["1993"],
			
 
				                 },
			
 
				             },
			
 
				         },
			
 
				-    },
			
 
				+    },  # TODO: inconsistency through removed data for KYOTOGHG (SARGWP100)
			
 
				     # TGO: more data in BUR / NIR, 1992-1998, 2000, 2005, 2010, 2013-2018 (
			
 
				     # downscaling needed for some years, inconsistent detail)
			
 
				     # THA: 1994 (2000-2013, extensive downscaling needed for 2000-2012).
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mauritania/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mauritania/__init__.py
@@ -0,0 +1,30 @@
 
				+"""Read Mauritania's BURs, NIRs, NCs
			
 
				+
			
 
				+Scripts and configurations to read Mauritania's submissions to the UNFCCC.
			
 
				+Currently, the following submissions and datasets are available (all datasets
			
 
				+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
			
 
				+
			
 
				+.. exec_code::
			
 
				+    :hide_code:
			
 
				+
			
 
				+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
			
 
				+                                                  get_country_submissions)
			
 
				+    country = 'MRT'
			
 
				+    # print available submissions
			
 
				+    print("="*15 + " Available submissions " + "="*15)
			
 
				+    get_country_submissions(country, True)
			
 
				+    print("")
			
 
				+
			
 
				+    #print available datasets
			
 
				+    print("="*15 + " Available datasets " + "="*15)
			
 
				+    get_country_datasets(country, True)
			
 
				+
			
 
				+You can also obtain this information running
			
 
				+
			
 
				+.. code-block:: bash
			
 
				+
			
 
				+    poetry run doit country_info country=MRT
			
 
				+
			
 
				+See below for a listing of scripts for BUR/NIR reading including links.
			
 
				+
			
 
				+"""
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mauritania/config_mrt_bur2.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mauritania/config_mrt_bur2.py
@@ -0,0 +1,803 @@
 
				+"""Config for Mauritania BUR2
			
 
				+
			
 
				+Configuration for reading the Mauritania's BUR2 from pdf.
			
 
				+Full configuration is contained here including configuraton for conversions to
			
 
				+primap2 data format.
			
 
				+
			
 
				+Not nicely structured, just copied from old script as this was integrated under time
			
 
				+pressure to fix a GWP bug (wrong GWP stated in report)
			
 
				+"""
			
 
				+
			
 
				+gwp_to_use = "AR4GWP100"
			
 
				+terminology_proc = "IPCC2006_PRIMAP"
			
 
				+
			
 
				+table_defs = {
			
 
				+    1990: [[0], [1], [2], [21], [28, 29], [42]],
			
 
				+    1995: [[3], [4], [5], [22], [30, 31], [43]],
			
 
				+    2000: [[6], [7], [8], [23], [32, 33], [44]],
			
 
				+    2010: [[9], [10], [11], [24], [34, 35], [45]],
			
 
				+    2012: [[12], [13], [14], [25], [36, 37], [46]],
			
 
				+    2015: [[15], [16], [17], [26], [38, 39], [47]],
			
 
				+    2018: [[18], [19], [20], [27], [40, 41], [48]],
			
 
				+}
			
 
				+
			
 
				+page_def_templates = {
			
 
				+    "24": {
			
 
				+        "area": ["51,745,579,87"],
			
 
				+        "cols": ["309,344,386,429,464,494,535"],
			
 
				+    },
			
 
				+    "odd": {  # 25, 27, 29, 31, 33, 35, 37
			
 
				+        "area": ["51,745,551,244", "55,231,554,118"],
			
 
				+        "cols": ["276,316,361,403,438,468,509", "276,319,361,407,441,472,511"],
			
 
				+    },
			
 
				+    "even": {  # 26, 28, 30, 32,34, 36
			
 
				+        "area": ["51,745,579,87"],
			
 
				+        "cols": ["304,344,386,429,464,494,535"],
			
 
				+    },
			
 
				+    "25": {  # 27, 29, 31, 33, 35, 37
			
 
				+        "area": ["51,745,551,244", "55,231,554,118"],
			
 
				+        "cols": ["276,316,361,403,438,468,509", "276,319,361,407,441,472,511"],
			
 
				+    },
			
 
				+    "26": {  # 28, 30, 32,34, 36
			
 
				+        "area": ["51,745,579,87"],
			
 
				+        "cols": ["309,344,386,429,464,494,535"],
			
 
				+    },
			
 
				+    "38": {
			
 
				+        "area": ["33,749,566,54"],
			
 
				+        "cols": ["220,243,263,283,308,336,359,415,471,493,517,546"],
			
 
				+    },
			
 
				+    "39": {
			
 
				+        "area": ["32,749,577,54"],
			
 
				+        "cols": ["224,254,275,294,320,345,367,426,482,503,525,553"],
			
 
				+    },
			
 
				+    "40": {
			
 
				+        "area": ["32,749,577,54"],
			
 
				+        "cols": ["224,245,265,287,314,338,360,420,476,496,518,546"],
			
 
				+    },
			
 
				+    "41": {  # 42
			
 
				+        "area": ["32,749,577,54"],
			
 
				+        "cols": ["220,245,265,287,314,338,360,420,476,496,518,546"],
			
 
				+    },
			
 
				+    "43": {
			
 
				+        "area": ["32,749,577,54"],
			
 
				+        "cols": ["220,245,268,287,314,338,360,420,476,496,518,546"],
			
 
				+    },
			
 
				+    "44": {
			
 
				+        "area": ["32,749,577,54"],
			
 
				+        "cols": ["220,245,268,283,314,338,360,420,476,496,518,546"],
			
 
				+    },
			
 
				+    "45": {
			
 
				+        "area": ["66,716,556,49"],
			
 
				+        "cols": ["287,362,399,441,479,515"],
			
 
				+    },
			
 
				+    "46": {
			
 
				+        "area": ["68,779,554,715", "68,677,554,52"],
			
 
				+        "cols": ["287,362,399,441,479,515", "308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "47": {
			
 
				+        "area": ["68,779,556,670", "67,640,555,48"],
			
 
				+        "cols": ["308,387,423,453,480,510", "308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "48": {
			
 
				+        "area": ["67,778,552,639", "67,610,553,49"],
			
 
				+        "cols": ["308,387,423,453,480,510", "308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "49": {
			
 
				+        "area": ["67,778,552,609", "67,579,553,49"],
			
 
				+        "cols": ["308,387,423,453,480,510", "308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "50": {
			
 
				+        "area": ["67,778,552,578", "67,550,553,49"],
			
 
				+        "cols": ["308,387,423,453,480,510", "308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "51": {
			
 
				+        "area": ["67,778,552,549"],
			
 
				+        "cols": ["308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "52": {
			
 
				+        "area": ["67,753,549,54"],
			
 
				+        "cols": ["308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "53": {
			
 
				+        "area": ["68,779,556,737"],
			
 
				+        "cols": ["308,387,423,453,480,510"],
			
 
				+    },
			
 
				+    "54": {
			
 
				+        "area": ["56,751,565,616", "56,587,565,449", "56,419,565,252", "56,217,565,74"],
			
 
				+        "cols": [
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+        ],
			
 
				+    },
			
 
				+    "55": {
			
 
				+        "area": ["56,752,565,600", "56,563,565,408", "56,369,565,216"],
			
 
				+        "cols": [
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+            "282,315,346,412,447,482,528",
			
 
				+        ],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+header_templates = {
			
 
				+    "24": {  # tables 0:20, 42: end
			
 
				+        "entity": ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOCs", "SO2"],
			
 
				+        "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				+        "rows": 2,
			
 
				+    },
			
 
				+    "38": {  # tables 21:27
			
 
				+        "entity": [
			
 
				+            "Catégories",
			
 
				+            "CO2",
			
 
				+            "CH4",
			
 
				+            "N2O",
			
 
				+            "HFCs",
			
 
				+            "PFCs",
			
 
				+            "SF6",
			
 
				+            "Autres gaz halogénés avec facteurs de conversion équivalent CO2",
			
 
				+            "Autres gaz halogénés sans facteurs de conversion équivalent CO2",
			
 
				+            "NOx",
			
 
				+            "CO",
			
 
				+            "NMVOCs",
			
 
				+            "SO2",
			
 
				+        ],
			
 
				+        "unit": [
			
 
				+            "",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+        ],
			
 
				+        "rows": 7,
			
 
				+    },
			
 
				+    "45": {  # tables 28:41
			
 
				+        "entity": [
			
 
				+            "Catégories",
			
 
				+            "Émissions/ absorptions CO2",
			
 
				+            "CH4",
			
 
				+            "N2O",
			
 
				+            "NOx",
			
 
				+            "CO",
			
 
				+            "COVNM",
			
 
				+        ],
			
 
				+        "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				+        "rows": 4,
			
 
				+    },
			
 
				+    "54": {  # tables 42:
			
 
				+        "entity": ["Catégories", "CO2", "CH4", "N2O", "NOx", "CO", "NMVOCs", "SO2"],
			
 
				+        "unit": ["", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg", "Gg"],
			
 
				+        "rows": 3,
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+fix_rows_template = {
			
 
				+    "24": {
			
 
				+        2: [
			
 
				+            "1.A.1.c   Transformation des combustibles solides et autres industries",
			
 
				+        ],
			
 
				+    },
			
 
				+    "25_1": {
			
 
				+        2: [
			
 
				+            "1.B.1.a.i.2  Emissions de gaz des couches lors des activités",
			
 
				+            "1.B.1.a.i.4 Combustion du méthane asséché ou",
			
 
				+            "1.B.1.a.ii.2  Emissions de gaz des couches lors des",
			
 
				+        ],
			
 
				+    },
			
 
				+    "25_2": {
			
 
				+        2: [
			
 
				+            "Émissions de CO2 imputables à la combustion de labiomasse pour",
			
 
				+        ],
			
 
				+    },
			
 
				+    "26": {
			
 
				+        2: [
			
 
				+            "1.A.1.c   Transformation des combustibles solides et autres industries",
			
 
				+            "1.A.2.i Industries extractives (à l’exclusion de l’extraction de",
			
 
				+        ],
			
 
				+    },
			
 
				+    "38": {
			
 
				+        2: [
			
 
				+            "2.D   Produits non énergétiques imputables aux",
			
 
				+            "2.F   Utilisations de produits comme substituts de",
			
 
				+        ],
			
 
				+    },
			
 
				+    "39": {
			
 
				+        2: [
			
 
				+            "2.D   Produits non énergétiques imputables aux combustibles",
			
 
				+            "2.F   Utilisations de produits comme substituts de substances",
			
 
				+        ],
			
 
				+    },
			
 
				+    "44": {
			
 
				+        -2: [
			
 
				+            "2  PROCÉDÉS INDUSTRIELS ET UTIL. DES PRODUITS",
			
 
				+        ],
			
 
				+        2: [
			
 
				+            "2.D   Produits non énergétiques imputables aux",
			
 
				+            "2.F   Utilisations de produits comme substituts de",
			
 
				+        ],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+table_reading_defs = {
			
 
				+    0: {
			
 
				+        "page": "24",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["24"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["24"],
			
 
				+    },
			
 
				+    1: {
			
 
				+        "page": "25",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    2: {
			
 
				+        "page": "25",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    3: {
			
 
				+        "page": "26",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    4: {
			
 
				+        "page": "27",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    5: {
			
 
				+        "page": "27",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    6: {
			
 
				+        "page": "28",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    7: {
			
 
				+        "page": "29",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    8: {
			
 
				+        "page": "29",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    9: {
			
 
				+        "page": "30",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    10: {
			
 
				+        "page": "31",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    11: {
			
 
				+        "page": "31",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    12: {
			
 
				+        "page": "32",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    13: {
			
 
				+        "page": "33",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    14: {
			
 
				+        "page": "33",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    15: {
			
 
				+        "page": "34",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    16: {
			
 
				+        "page": "35",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    17: {
			
 
				+        "page": "35",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    18: {
			
 
				+        "page": "36",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["even"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["26"],
			
 
				+    },
			
 
				+    19: {
			
 
				+        "page": "37",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_1"],
			
 
				+    },
			
 
				+    20: {
			
 
				+        "page": "37",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["odd"],
			
 
				+        "header": header_templates["24"],
			
 
				+        "fix_rows": fix_rows_template["25_2"],
			
 
				+    },
			
 
				+    21: {
			
 
				+        "page": "38",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["38"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["38"],
			
 
				+    },
			
 
				+    22: {
			
 
				+        "page": "39",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["39"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["39"],
			
 
				+    },
			
 
				+    23: {
			
 
				+        "page": "40",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["40"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["38"],
			
 
				+    },
			
 
				+    24: {
			
 
				+        "page": "41",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["41"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["38"],
			
 
				+    },
			
 
				+    25: {
			
 
				+        "page": "42",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["41"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["38"],
			
 
				+    },
			
 
				+    26: {
			
 
				+        "page": "43",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["43"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["38"],
			
 
				+    },
			
 
				+    27: {
			
 
				+        "page": "44",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["44"],
			
 
				+        "header": header_templates["38"],
			
 
				+        "fix_rows": fix_rows_template["44"],
			
 
				+    },
			
 
				+    28: {
			
 
				+        "page": "45",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["45"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    29: {
			
 
				+        "page": "46",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["46"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    30: {
			
 
				+        "page": "46",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["46"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    31: {
			
 
				+        "page": "47",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["47"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    32: {
			
 
				+        "page": "47",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["47"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    33: {
			
 
				+        "page": "48",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["48"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    34: {
			
 
				+        "page": "48",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["48"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    35: {
			
 
				+        "page": "49",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["49"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    36: {
			
 
				+        "page": "49",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["49"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    37: {
			
 
				+        "page": "50",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["50"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    38: {
			
 
				+        "page": "50",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["50"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    39: {
			
 
				+        "page": "51",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["51"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    40: {
			
 
				+        "page": "52",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["52"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    41: {
			
 
				+        "page": "53",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["53"],
			
 
				+        "header": header_templates["45"],
			
 
				+    },
			
 
				+    42: {
			
 
				+        "page": "54",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["54"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    43: {
			
 
				+        "page": "54",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["54"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    44: {
			
 
				+        "page": "54",
			
 
				+        "table": 2,
			
 
				+        "page_def": page_def_templates["54"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    45: {
			
 
				+        "page": "54",
			
 
				+        "table": 2,
			
 
				+        "page_def": page_def_templates["54"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    46: {
			
 
				+        "page": "55",
			
 
				+        "table": 0,
			
 
				+        "page_def": page_def_templates["55"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    47: {
			
 
				+        "page": "55",
			
 
				+        "table": 1,
			
 
				+        "page_def": page_def_templates["55"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+    48: {
			
 
				+        "page": "55",
			
 
				+        "table": 2,
			
 
				+        "page_def": page_def_templates["55"],
			
 
				+        "header": header_templates["54"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+
			
 
				+remove_per_table = [
			
 
				+    [
			
 
				+        "1.A.3.d.i di Navigation internationale (soutes internationales) (1)",
			
 
				+        "1.A.3.a.i ai Aviation internationale (soutes internationales) (1)",
			
 
				+        "1.A.5.c Opérations multilatérales (1) (2)",
			
 
				+    ],  # these could also be removed globally as names slightly different
			
 
				+    [],
			
 
				+    [],
			
 
				+    [],
			
 
				+    [],
			
 
				+    [],
			
 
				+]
			
 
				+
			
 
				+fix_cat_values = {
			
 
				+    "Catégorie": "Catégories",
			
 
				+}
			
 
				+
			
 
				+fix_cat_using_preceeding = {  # fix cat code based on cat code before
			
 
				+    "3.A.2.i Volaille": {"3.A.1.h Porcins": "3.A.1.i Volaille"},
			
 
				+}
			
 
				+
			
 
				+# definitions for conversion to long format with standardized unit format
			
 
				+unit_row = 0
			
 
				+entity_row = 1
			
 
				+unit_entity_rows = [unit_row, entity_row]
			
 
				+
			
 
				+index_cols = ["Catégories"]
			
 
				+
			
 
				+# special header as category code and name in one column
			
 
				+header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				+
			
 
				+overlap_problems = {
			
 
				+    "1.A.2.i Industries extractives (à l’exclusion de l’extraction de combustibles) 113,528": [
			
 
				+        "1.A.2.i Industries extractives (à l’exclusion de l’extraction de combustibles)",
			
 
				+        "113,528",
			
 
				+    ],
			
 
				+}
			
 
				+
			
 
				+## definitions part 2: conversion to PRIMAP2 interchnage format
			
 
				+
			
 
				+# rows to remove
			
 
				+cats_remove = ["Information Items", "Memo Items (3)"]
			
 
				+
			
 
				+# manual category codes
			
 
				+cat_codes_manual = {
			
 
				+    "Soutesinternationales": "M.BK",
			
 
				+    "Émissions de CO2 imputables à la combustion de labiomasse pour la production d’énergie": "M.BIO",
			
 
				+    "1.A.3.d.i Navigation internationale": "M.BK.M",
			
 
				+    "1.A.3.a.i Aviation internationale": "M.BK.A",
			
 
				+    "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
			
 
				+}
			
 
				+
			
 
				+cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,14})\s.*"
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "Mauritania-GHG-inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "MRT",
			
 
				+    "scenario": "BUR2",
			
 
				+}
			
 
				+
			
 
				+coords_value_mapping = {
			
 
				+    "unit": "PRIMAP1",
			
 
				+    "entity": {
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "NMVOCs": "NMVOC",
			
 
				+        "COVNM": "NMVOC",
			
 
				+        "Net CO2": "CO2",
			
 
				+        "Émissions/ absorptions CO2": "CO2",
			
 
				+        "Émissions/ absorptions nettes de CO2": "CO2",
			
 
				+        "Autres gaz halogénés avec facteurs de conversion équivalent CO2": f"OTHERHFCS ({gwp_to_use})",
			
 
				+        #'Other halogenated gases without CO2 equivalent conversion factors (2)': 'OTHERHFCS',
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "SF6": f"SF6 ({gwp_to_use})",
			
 
				+        "HFC-23": "HFC23",
			
 
				+        "HFC-32": "HFC32",
			
 
				+        "HFC-41": "HFC41",
			
 
				+        "HFC-43-10mee": "HFC4310mee",
			
 
				+        "HFC-125": "HFC125",
			
 
				+        "HFC-134": "HFC134",
			
 
				+        "HFC-134a": "HFC134a",
			
 
				+        "HFC-152a": "HFC152a",
			
 
				+        "HFC-143": "HFC143",
			
 
				+        "HFC-143a": "HFC143a",
			
 
				+        "HFC-227ea": "HFC227ea",
			
 
				+        "HFC-236fa": "HFC236fa",
			
 
				+        "HFC-245ca": "HFC245ca",
			
 
				+        "c-C4F8": "cC4F8",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+coords_cols = {"category": "category", "entity": "entity", "unit": "unit"}
			
 
				+
			
 
				+add_coords_cols = {
			
 
				+    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				+}
			
 
				+
			
 
				+filter_remove = {
			
 
				+    "f1": {
			
 
				+        "entity": ["Autres gaz halogénés sans facteurs de conversion équivalent CO2"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/documents/279303",
			
 
				+    "rights": "",
			
 
				+    "contact": "mail@johannes-guetschow.de",
			
 
				+    "title": "République Islamique de Mauritanie - RAPPORT NATIONAL DES INVENTAIRES DES GAZ A EFFET DE SERRE - RNI",
			
 
				+    "comment": "Read fom pdf file (Mauritania BUR 2 - NIR Annexes - May 2020.pdf) by Johannes Gütschow. ",
			
 
				+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				+}
			
 
				+
			
 
				+# part 3 fgases defintions
			
 
				+table_defs_fgases = {
			
 
				+    1990: [0],
			
 
				+    1995: [1, 2],
			
 
				+    2000: [3],
			
 
				+    2010: [4, 5],
			
 
				+    2012: [6],
			
 
				+    2015: [7, 8],
			
 
				+    2018: [9],
			
 
				+}
			
 
				+pages_fgases = ["92", "92", "93", "93", "93", "94", "94", "94", "95", "95"]
			
 
				+
			
 
				+area_fgases = [
			
 
				+    "55,508,833,280",
			
 
				+    "55,263,833,50",
			
 
				+    "55,530,833,511",
			
 
				+    "55,491,833,264",
			
 
				+    "55,244,833,51",
			
 
				+    "55,532,833,493",
			
 
				+    "55,473,833,245",
			
 
				+    "55,224,833,53",
			
 
				+    "55,530,833,473",
			
 
				+    "55,430,833,200",
			
 
				+]
			
 
				+cols_fgases = [
			
 
				+    "259,300,320,345,373,391,422,444,465,486,508,534,561,585,613,642,671,693,721,748,776,805"
			
 
				+]
			
 
				+
			
 
				+rows_to_fix_fgases = {
			
 
				+    2: ["2.F   Utilisations de produits comme substituts de substances"],
			
 
				+    -3: ["Catégories"],
			
 
				+}
			
 
				+
			
 
				+# definitions for conversion to long format with standardized unit format
			
 
				+unit_row_fgases = 1
			
 
				+entity_row_fgases = 0
			
 
				+unit_entity_rows_fgases = [unit_row_fgases, entity_row_fgases]
			
 
				+unit_info_fgases = {
			
 
				+    "default_unit": "",
			
 
				+    "regexp_entity": r"^.*",
			
 
				+    "regexp_unit": None,  # temp fix until param is marked as optional in PRIMAP2
			
 
				+    "manual_repl_unit": {
			
 
				+        "Catégories": "",
			
 
				+        "Émissions en unité de masse d’origine (tonne)": "t",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+
			
 
				+first_ignore_cat_fgases = "Émissions en unité équivalent CO2 (Gg Eq-CO2)"
			
 
				+cats_remove_fgases = [
			
 
				+    "Facteurs de conversion  équivalent CO2 [GWP du SAR sur 100 ans ]"
			
 
				+]
			
 
				+
			
 
				+entities_to_remove_fgases = ["Total HFCs", "Total PFCs"]
			
 
				+
			
 
				+## processing
			
 
				+proc_info_country = {
			
 
				+    "aggregate_coords": {
			
 
				+        "category": {
			
 
				+            "2.D": {
			
 
				+                "sources": ["2.D.1", "2.D.2", "2.D.3", "2.D.4"],
			
 
				+                # 'name': 'Non-Energy Products from Fuels and Solvent Use'
			
 
				+            },
			
 
				+            "2.G.1": {
			
 
				+                "sources": ["2.G.1.a"],
			
 
				+                # 'name': 'Electrical Equipment'
			
 
				+            },
			
 
				+            "2.G": {
			
 
				+                "sources": ["2.G.1", "2.G.2", "2.G.3", "2.G.4"],
			
 
				+                # 'name': 'Other Product Manufacture and Use'
			
 
				+            },
			
 
				+            "2.F": {
			
 
				+                "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
			
 
				+                # 'name': 'Product uses as Substitutes for Ozone Depleting Substances'
			
 
				+            },  # needed for fgases only
			
 
				+            "2.H": {
			
 
				+                "sources": ["2.H.1", "2.H.3"],
			
 
				+                # 'name': 'Other'
			
 
				+            },
			
 
				+            "2": {
			
 
				+                "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
			
 
				+                # 'name': 'IPPU'
			
 
				+            },  # needed for fgases only
			
 
				+            "M.3.C.1.AG": {
			
 
				+                "sources": ["3.C.1.c"],
			
 
				+                # 'name': 'Emissions from Biomass Burning (Agriculture)'
			
 
				+            },
			
 
				+            "M.3.C.AG": {
			
 
				+                "sources": ["M.3.C.1.AG", "3.C.3", "3.C.4"],
			
 
				+                # 'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'
			
 
				+            },
			
 
				+            "M.AG.ELV": {
			
 
				+                "sources": ["M.3.C.AG"],
			
 
				+                # 'name': 'Agriculture excluding livestock emissions'
			
 
				+            },
			
 
				+            "M.AG": {
			
 
				+                "sources": ["3.A", "M.AG.ELV"],
			
 
				+                # 'name': 'Agriculture'
			
 
				+            },
			
 
				+            "M.LULUCF": {
			
 
				+                "sources": ["3.B"],
			
 
				+                # 'name': 'Land Use, Land Use Change, and Forestry'
			
 
				+            },
			
 
				+            "3": {
			
 
				+                "sources": ["M.AG", "M.LULUCF"],
			
 
				+                # 'name': 'AFOLU'
			
 
				+            },
			
 
				+            "M.0.EL": {
			
 
				+                "sources": ["1", "2", "M.AG", "4"],
			
 
				+                # 'name': 'National Total Excluding LULUCF'
			
 
				+            },
			
 
				+            "0": {
			
 
				+                "sources": ["1", "2", "3", "4"],
			
 
				+                # 'name': 'National Total'
			
 
				+            },  # neede for fgases only
			
 
				+        },
			
 
				+    },
			
 
				+    "remove_ts": {
			
 
				+        "2A_NMVOC": {  # should be 0
			
 
				+            "category": ["2.A"],
			
 
				+            "entities": ["NMVOC"],
			
 
				+            "time": ["1990"],
			
 
				+        },
			
 
				+        "2D_NMVOC": {  # is 0 needs to be recomputed
			
 
				+            "category": ["2.D"],
			
 
				+            "entities": ["NMVOC"],
			
 
				+            "time": ["2012"],
			
 
				+        },
			
 
				+    },
			
 
				+}
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mauritania/read_MRT_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mauritania/read_MRT_BUR2_from_pdf.py
@@ -0,0 +1,482 @@
 
				+"""
			
 
				+Read data from Mauritania's BUR2.
			
 
				+
			
 
				+Data are read from pdf. The file contains a detailed inventory for
			
 
				+1990, 1995, 2000, 2010, 2012, 2015, 1018.
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+import camelot
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import primap2 as pm2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    compression,
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+from unfccc_ghg_data.unfccc_reader.Mauritania.config_mrt_bur2 import (
			
 
				+    area_fgases,
			
 
				+    cat_code_regexp,
			
 
				+    cat_codes_manual,
			
 
				+    cats_remove,
			
 
				+    cats_remove_fgases,
			
 
				+    cols_fgases,
			
 
				+    coords_cols,
			
 
				+    coords_defaults,
			
 
				+    coords_terminologies,
			
 
				+    coords_value_mapping,
			
 
				+    entities_to_remove_fgases,
			
 
				+    entity_row,
			
 
				+    entity_row_fgases,
			
 
				+    filter_remove,
			
 
				+    first_ignore_cat_fgases,
			
 
				+    fix_cat_using_preceeding,
			
 
				+    fix_cat_values,
			
 
				+    gwp_to_use,
			
 
				+    header_long,
			
 
				+    index_cols,
			
 
				+    meta_data,
			
 
				+    pages_fgases,
			
 
				+    proc_info_country,
			
 
				+    remove_per_table,
			
 
				+    rows_to_fix_fgases,
			
 
				+    table_defs,
			
 
				+    table_defs_fgases,
			
 
				+    table_reading_defs,
			
 
				+    terminology_proc,
			
 
				+    unit_entity_rows,
			
 
				+    unit_entity_rows_fgases,
			
 
				+    unit_info_fgases,
			
 
				+    unit_row,
			
 
				+    unit_row_fgases,
			
 
				+)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Mauritania" / "BUR2"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Mauritania"
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    output_filename = "MRT_BUR2_2020_"
			
 
				+    inventory_file = "Mauritania_BUR_2_-_NIR_Annexes_-_May_2020.pdf"
			
 
				+
			
 
				+    # ###
			
 
				+    # read the tables from pdf
			
 
				+    # ###
			
 
				+
			
 
				+    ## main tables
			
 
				+    # empty dataframe
			
 
				+    df_all = None
			
 
				+    for year in table_defs.keys():
			
 
				+        print(f"Working on year {year}")
			
 
				+
			
 
				+        # join the tables which need combining
			
 
				+        df_this_year = None
			
 
				+        for table_parts, cats_remove_this_table in zip(
			
 
				+            table_defs[year], remove_per_table
			
 
				+        ):
			
 
				+            new_table = camelot.read_pdf(
			
 
				+                str(input_folder / inventory_file),
			
 
				+                pages=table_reading_defs[table_parts[0]]["page"],
			
 
				+                table_areas=[
			
 
				+                    table_reading_defs[table_parts[0]]["page_def"]["area"][
			
 
				+                        table_reading_defs[table_parts[0]]["table"]
			
 
				+                    ]
			
 
				+                ],
			
 
				+                columns=[
			
 
				+                    table_reading_defs[table_parts[0]]["page_def"]["cols"][
			
 
				+                        table_reading_defs[table_parts[0]]["table"]
			
 
				+                    ]
			
 
				+                ],
			
 
				+                flavor="stream",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+            df_this_table = new_table[0].df
			
 
				+            if "fix_rows" in table_reading_defs[table_parts[0]].keys():
			
 
				+                rows_to_fix = table_reading_defs[table_parts[0]]["fix_rows"]
			
 
				+                for n_rows in rows_to_fix:
			
 
				+                    df_this_table = fix_rows(
			
 
				+                        df_this_table,
			
 
				+                        rows_to_fix=rows_to_fix[n_rows],
			
 
				+                        col_to_use=0,
			
 
				+                        n_rows=n_rows,
			
 
				+                    )
			
 
				+            if len(table_parts) > 1:
			
 
				+                parts_remaining = table_parts[1:]
			
 
				+                for part in parts_remaining:
			
 
				+                    new_table = camelot.read_pdf(
			
 
				+                        str(input_folder / inventory_file),
			
 
				+                        pages=table_reading_defs[part]["page"],
			
 
				+                        table_areas=[
			
 
				+                            table_reading_defs[part]["page_def"]["area"][
			
 
				+                                table_reading_defs[part]["table"]
			
 
				+                            ]
			
 
				+                        ],
			
 
				+                        columns=[
			
 
				+                            table_reading_defs[part]["page_def"]["cols"][
			
 
				+                                table_reading_defs[part]["table"]
			
 
				+                            ]
			
 
				+                        ],
			
 
				+                        flavor="stream",
			
 
				+                        split_text=True,
			
 
				+                    )
			
 
				+                    df_new_table_part = new_table[0].df
			
 
				+                    if "fix_rows" in table_reading_defs[part].keys():
			
 
				+                        rows_to_fix = table_reading_defs[part]["fix_rows"]
			
 
				+                        for n_rows in rows_to_fix:
			
 
				+                            df_new_table_part = fix_rows(
			
 
				+                                df_new_table_part,
			
 
				+                                rows_to_fix=rows_to_fix[n_rows],
			
 
				+                                col_to_use=0,
			
 
				+                                n_rows=n_rows,
			
 
				+                            )
			
 
				+                    df_this_table = pd.concat([df_this_table, df_new_table_part])
			
 
				+
			
 
				+            df_this_table = df_this_table.reset_index(drop=True)
			
 
				+
			
 
				+            df_this_table = df_this_table.drop(
			
 
				+                df_this_table.index[
			
 
				+                    : table_reading_defs[table_parts[0]]["header"]["rows"]
			
 
				+                ],
			
 
				+            )
			
 
				+            df_this_table.columns = [
			
 
				+                table_reading_defs[table_parts[0]]["header"]["entity"],
			
 
				+                table_reading_defs[table_parts[0]]["header"]["unit"],
			
 
				+            ]
			
 
				+
			
 
				+            # replace '' by nan for filling
			
 
				+            df_this_table.iloc[unit_entity_rows] = df_this_table.iloc[
			
 
				+                unit_entity_rows
			
 
				+            ].replace("", np.nan)
			
 
				+            # fill the units to the right as for merged cells the unit is only
			
 
				+            # in the first cell
			
 
				+            df_this_table.iloc[unit_row] = df_this_table.iloc[unit_row].ffill(axis=0)
			
 
				+            # fill entity from unit if empty
			
 
				+            df_this_table.iloc[unit_entity_rows] = df_this_table.iloc[
			
 
				+                unit_entity_rows
			
 
				+            ].ffill()
			
 
				+
			
 
				+            # fix values in category col
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].replace(fix_cat_values)
			
 
				+
			
 
				+            # replace line breaks, double, and triple spaces in category names
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				+
			
 
				+            # fix category values using preceding categories
			
 
				+            for cat in fix_cat_using_preceeding:
			
 
				+                mask = df_this_table.iloc[:, 0] == cat
			
 
				+                if any(mask):
			
 
				+                    print(f"Found occurence of category to fix {cat}")
			
 
				+                    indices = np.where(mask)[0]
			
 
				+                    for idx in indices:
			
 
				+                        if (
			
 
				+                            df_this_table.iloc[idx - 1, 0]
			
 
				+                            in fix_cat_using_preceeding[cat].keys()
			
 
				+                        ):
			
 
				+                            df_this_table.iloc[idx, 0] = fix_cat_using_preceeding[cat][
			
 
				+                                df_this_table.iloc[idx - 1, 0]
			
 
				+                            ]
			
 
				+                            print(
			
 
				+                                f"Replaced {cat} by {fix_cat_using_preceeding[cat][df_this_table.iloc[idx - 1, 0]]}"  # noqa: E501
			
 
				+                            )
			
 
				+
			
 
				+            # reindex because we have double indices
			
 
				+            df_this_table = df_this_table.reset_index(drop=True)
			
 
				+
			
 
				+            # remove given rows
			
 
				+            for cat in cats_remove_this_table:
			
 
				+                # old_len = len(df_this_table)
			
 
				+                df_this_table = df_this_table.drop(
			
 
				+                    df_this_table[df_this_table[index_cols[0]] == cat].index
			
 
				+                )
			
 
				+                # new_len = len(df_this_table)
			
 
				+                # print(f"Removed {old_len - new_len} rows from table for year {year}
			
 
				+                # and category {cat}.")
			
 
				+
			
 
				+            # set index and convert to long format
			
 
				+            df_this_table = df_this_table.set_index(index_cols)
			
 
				+            # df_before_convert = df_this_table.copy(deep=True)
			
 
				+            df_this_table_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+                df_this_table, year, header_long
			
 
				+            )
			
 
				+
			
 
				+            # combine with tables for other sectors (merge not append)
			
 
				+            if df_this_year is None:
			
 
				+                df_this_year = df_this_table_long
			
 
				+            else:
			
 
				+                df_this_year = pd.concat([df_this_year, df_this_table_long])
			
 
				+
			
 
				+        # aggregate years to df_all
			
 
				+        if df_all is None:
			
 
				+            df_all = df_this_year
			
 
				+        else:
			
 
				+            df_all = pd.concat([df_all, df_this_year])
			
 
				+
			
 
				+    df_all = df_all.reset_index(drop=True)
			
 
				+
			
 
				+    ## fgases ##############
			
 
				+
			
 
				+    df_all_fgases = None
			
 
				+    for year in table_defs_fgases.keys():
			
 
				+        print(f"Working on fgases year {year}")
			
 
				+
			
 
				+        # join the tables which need combining
			
 
				+        table_parts = table_defs_fgases[year]
			
 
				+        tables_fgases = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file),
			
 
				+            pages=pages_fgases[table_parts[0]],
			
 
				+            table_areas=[area_fgases[table_parts[0]]],
			
 
				+            columns=cols_fgases,
			
 
				+            flavor="stream",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+        df_this_year = tables_fgases[0].df.copy(deep=True)
			
 
				+        if len(table_parts) > 1:
			
 
				+            parts_remaining = table_parts[1:]
			
 
				+            for part in parts_remaining:
			
 
				+                tables_fgases = camelot.read_pdf(
			
 
				+                    str(input_folder / inventory_file),
			
 
				+                    pages=pages_fgases[part],
			
 
				+                    table_areas=[area_fgases[part]],
			
 
				+                    columns=cols_fgases,
			
 
				+                    flavor="stream",
			
 
				+                    split_text=True,
			
 
				+                )
			
 
				+                df_this_year = pd.concat([df_this_year, tables_fgases[0].df])
			
 
				+
			
 
				+        # reindex because we have double indices
			
 
				+        df_this_year = df_this_year.reset_index(drop=True)
			
 
				+
			
 
				+        for n_rows in rows_to_fix_fgases:
			
 
				+            df_this_year = fix_rows(
			
 
				+                df_this_year,
			
 
				+                rows_to_fix=rows_to_fix_fgases[n_rows],
			
 
				+                col_to_use=0,
			
 
				+                n_rows=n_rows,
			
 
				+            )
			
 
				+
			
 
				+        # remove additional header rows
			
 
				+        for cat in cats_remove_fgases:
			
 
				+            df_this_year = df_this_year.drop(df_this_year[df_this_year[0] == cat].index)
			
 
				+
			
 
				+        # add category col label if missing
			
 
				+        if (df_this_year.iloc[entity_row][0] == "") & (
			
 
				+            str(df_this_year.iloc[unit_row][0]) == ""
			
 
				+        ):
			
 
				+            print(f"Add category header for table {table_parts[0]}")
			
 
				+            df_this_year.iloc[entity_row][0] = index_cols[0]
			
 
				+
			
 
				+        # replace '' by nan for filling
			
 
				+        df_this_year.iloc[unit_entity_rows_fgases] = df_this_year.iloc[
			
 
				+            unit_entity_rows_fgases
			
 
				+        ].replace("", np.nan)
			
 
				+        # fill the units to the right as for merged cells the unit is only in
			
 
				+        # the first cell
			
 
				+        df_this_year.iloc[unit_row_fgases] = df_this_year.iloc[unit_row_fgases].fillna(
			
 
				+            axis=0, method="ffill"
			
 
				+        )
			
 
				+
			
 
				+        # replace line breaks in units and entities
			
 
				+        df_this_year.iloc[entity_row_fgases] = df_this_year.iloc[
			
 
				+            entity_row_fgases
			
 
				+        ].str.replace("\n", "")
			
 
				+        df_this_year.iloc[unit_row_fgases] = df_this_year.iloc[
			
 
				+            unit_row_fgases
			
 
				+        ].str.replace("\n", "")
			
 
				+        df_this_year.iloc[entity_row_fgases] = df_this_year.iloc[
			
 
				+            entity_row_fgases
			
 
				+        ].str.replace("   ", " ")
			
 
				+        df_this_year.iloc[unit_row_fgases] = df_this_year.iloc[
			
 
				+            unit_row_fgases
			
 
				+        ].str.replace("   ", " ")
			
 
				+        df_this_year.iloc[entity_row_fgases] = df_this_year.iloc[
			
 
				+            entity_row_fgases
			
 
				+        ].str.replace("  ", " ")
			
 
				+        df_this_year.iloc[unit_row_fgases] = df_this_year.iloc[
			
 
				+            unit_row_fgases
			
 
				+        ].str.replace("  ", " ")
			
 
				+        df_this_year.iloc[entity_row_fgases] = df_this_year.iloc[
			
 
				+            entity_row_fgases
			
 
				+        ].str.strip()
			
 
				+        df_this_year.iloc[unit_row_fgases] = df_this_year.iloc[
			
 
				+            unit_row_fgases
			
 
				+        ].str.strip()
			
 
				+
			
 
				+        # replace line breaks, double, and triple spaces in category names
			
 
				+        df_this_year.iloc[:, 0] = df_this_year.iloc[:, 0].str.replace("\n", " ")
			
 
				+        df_this_year.iloc[:, 0] = df_this_year.iloc[:, 0].str.replace("   ", " ")
			
 
				+        df_this_year.iloc[:, 0] = df_this_year.iloc[:, 0].str.replace("  ", " ")
			
 
				+
			
 
				+        # set unit row cat label to nan
			
 
				+        df_this_year.iloc[unit_row_fgases, 0] = np.nan
			
 
				+
			
 
				+        # remove second part of table with GWP weighted data
			
 
				+        idx = df_this_year[
			
 
				+            df_this_year.iloc[:, 0] == first_ignore_cat_fgases
			
 
				+        ].index.tolist()[0]
			
 
				+        df_this_year = df_this_year.loc[: idx - 1]
			
 
				+
			
 
				+        df_this_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_this_year,
			
 
				+            unit_row=unit_row_fgases,
			
 
				+            entity_row=entity_row_fgases,
			
 
				+            **unit_info_fgases,
			
 
				+        )
			
 
				+
			
 
				+        # remove entities
			
 
				+        df_this_year = df_this_year.drop(columns=entities_to_remove_fgases)
			
 
				+
			
 
				+        # set index and convert to long format
			
 
				+        df_this_year = df_this_year.set_index(index_cols)
			
 
				+        df_this_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_this_year, year, header_long
			
 
				+        )
			
 
				+
			
 
				+        # aggregate years to df_all
			
 
				+        if df_all_fgases is None:
			
 
				+            df_all_fgases = df_this_year_long
			
 
				+        else:
			
 
				+            df_all_fgases = pd.concat([df_all_fgases, df_this_year_long])
			
 
				+
			
 
				+    # combine with other data
			
 
				+    df_all = pd.concat([df_all, df_all_fgases])
			
 
				+
			
 
				+    # drop the rows with memo items etc
			
 
				+    for cat in cats_remove:
			
 
				+        df_all = df_all.drop(df_all[df_all["orig_cat_name"] == cat].index)
			
 
				+
			
 
				+        # make a copy of the categories row
			
 
				+    df_all["category"] = df_all["orig_cat_name"]
			
 
				+
			
 
				+    # temp: drop NOx and CO as the data is not read properly
			
 
				+    # df_all = df_all.drop(df_all[df_all["entity"] == "CO"].index)
			
 
				+    # df_all = df_all.drop(df_all[df_all["entity"] == "NOx"].index)
			
 
				+
			
 
				+    # replace cat names by codes in col "category"
			
 
				+    # first the manual replacements
			
 
				+    df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				+
			
 
				+    # then the regex repalcements
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_all["category"] = df_all["category"].str.replace(
			
 
				+        cat_code_regexp, repl, regex=True
			
 
				+    )
			
 
				+    df_all = df_all.reset_index(drop=True)
			
 
				+
			
 
				+    # replace "," with "." in data and remove space in number
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(",", ".", regex=False)
			
 
				+    df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(". ", ".", regex=False)
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_all.columns = df_all.columns.map(str)
			
 
				+
			
 
				+    data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_all,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				+
			
 
				+    data_if = data_if.drop(columns="orig_cat_name")
			
 
				+    data_if.attrs["dimensions"]["*"].remove("orig_cat_name")
			
 
				+
			
 
				+    # conversion to PRIMAP2 native format
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				+
			
 
				+    # ###
			
 
				+    # save data to IF and native format
			
 
				+    # ###
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				+
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				+
			
 
				+    ### processing
			
 
				+    data_proc_pm2 = data_pm2  # copy not needed data_pm2 is not needed any more
			
 
				+    # fix HFC values (code is more general than needed as prep for transfer
			
 
				+    # into a function
			
 
				+    HFC_fix = {  # SAR GWP while rest uses AR4
			
 
				+        "dim": "category",
			
 
				+        "source_value": "2",
			
 
				+        "target_values": ["2.F", "2.F.1", "2.F.1.a"],
			
 
				+        "filter": {
			
 
				+            "variable": [f"HFCS ({gwp_to_use})"],
			
 
				+            "time": ["2000", "2010"],
			
 
				+        },
			
 
				+    }
			
 
				+    filter = HFC_fix["filter"]
			
 
				+    variables = data_proc_pm2.data_vars
			
 
				+    if "variable" in filter:
			
 
				+        filter_vars = filter.pop("variable")
			
 
				+        variables = [var for var in filter_vars if var in variables]
			
 
				+
			
 
				+    filter_source = filter.copy()
			
 
				+    filter_source[HFC_fix["dim"]] = HFC_fix["source_value"]
			
 
				+    for var in variables:
			
 
				+        source_data = data_proc_pm2[var].pr.loc[filter_source]
			
 
				+        for value in HFC_fix["target_values"]:
			
 
				+            data_proc_pm2[var] = data_proc_pm2[var].pr.set(
			
 
				+                HFC_fix["dim"], value, source_data, existing="overwrite"
			
 
				+            )
			
 
				+
			
 
				+    # actual processing
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        processing_info_country=proc_info_country,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+    )
			
 
				+
			
 
				+    # adapt source and metadata
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
			
 
				+
			
 
				+    # ###
			
 
				+    # save data to IF and native format
			
 
				+    # ###
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + terminology_proc),
			
 
				+        data_proc_if,
			
 
				+    )
			
 
				+
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )