vor 10 Monaten · 16b87f0f0c
--- a/UNFCCC_GHG_data/UNFCCC_reader/Israel/config_ISR_BUR2.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Israel/config_ISR_BUR2.py
@@ -388,11 +388,6 @@ cat_conversion = {
 
				         'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National total '
			
 
				                                                                     'excluding LULUCF'},
			
 
				     },
			
 
				-    'basket_copy': {
			
 
				-        'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
			
 
				-        'entities': ["HFCS", "PFCS"],
			
 
				-        'source_GWP': 'SARGWP100',
			
 
				-    },
			
 
				 }
			
 
				 
			
 
				 sectors_to_save = [
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
@@ -55,38 +55,6 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
 
				         data = data.reset_index(drop=True)
			
 
				     return data
			
 
				 
			
 
				-def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: List[Union[int, str]])->pd.DataFrame:
			
 
				-    index = data.loc[data[col] == keyword].index
			
 
				-    if not list(index):
			
 
				-        print("Keyword for table transformation not found")
			
 
				-        return data
			
 
				-    elif len(index)==1:
			
 
				-        print("Keyword for table transformation found only once")
			
 
				-        return data
			
 
				-    else:
			
 
				-        df_all = None
			
 
				-        for i, item in enumerate(index):
			
 
				-            loc = data.index.get_loc(item)
			
 
				-            if i < len(index) - 1:
			
 
				-                next_loc = data.index.get_loc(index[i + 1])
			
 
				-            else:
			
 
				-                next_loc = data.index[-1] + 1
			
 
				-            df_to_add = data.loc[list(range(loc, next_loc))]
			
 
				-            # select only cols which don't have NaN, Null, or '' as header
			
 
				-            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
			
 
				-            df_to_add = df_to_add.loc[: , filter_nan]
			
 
				-            df_to_add.columns = df_to_add.iloc[0]
			
 
				-            #print(df_to_add.columns)
			
 
				-            df_to_add = df_to_add.drop(loc)
			
 
				-            df_to_add = df_to_add.set_index(index_cols)
			
 
				-            
			
 
				-            if df_all is None:
			
 
				-                df_all = df_to_add
			
 
				-            else:
			
 
				-                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
			
 
				-        return df_all
			
 
				-        
			
 
				-
			
 
				 # page defs tp hold information on reading the table
			
 
				 page_defs = {
			
 
				     '5': { 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2023.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2023.py
@@ -0,0 +1,447 @@
 
				+# config and functions for Taiwan NIR 2022
			
 
				+
			
 
				+from typing import Union, List
			
 
				+import pandas as pd
			
 
				+import xarray as xr
			
 
				+from typing import Optional, Any
			
 
				+
			
 
				+gwp_to_use = "AR4GWP100"
			
 
				+terminology_proc = 'IPCC2006_PRIMAP'
			
 
				+
			
 
				+##### Table definitions
			
 
				+# page defs to hold information on reading the table
			
 
				+page_defs = {
			
 
				+    '5': { 
			
 
				+        "table_areas": ['36,523,563,68'],
			
 
				+        "split_text": False,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '6': {
			
 
				+        "table_areas": ['34,562,563,53'],
			
 
				+        #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '7': {
			
 
				+        "table_areas": ['36,743,531,482', '36,425,564,54'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '8': {
			
 
				+        "table_areas": ['35,748,534,567'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '9': {
			
 
				+        "table_areas": ['34,753,565,286', '34,235,565,63'],
			
 
				+        "split_text": False,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '10': {
			
 
				+        "table_areas": ['34,753,565,449'],
			
 
				+        "split_text": False,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '11': {
			
 
				+        "table_areas": ['32,522,566,208'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '12': {
			
 
				+        "table_areas": ['33,549,562,64'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '13': {
			
 
				+        "table_areas": ['31,761,532,517'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '14': {
			
 
				+        "table_areas": ['32,751,563,70'],
			
 
				+        "columns": ['217,250,282,313,344,374,406,437,468,501,531'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '15': {
			
 
				+        "table_areas": ['32,345,565,53'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '16': {
			
 
				+        "table_areas": ['32,745,532,597'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    },
			
 
				+    '18': {
			
 
				+        "table_areas": ['30,747,564,260'],
			
 
				+        "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
			
 
				+        "split_text": True,
			
 
				+        "flavor": "stream",
			
 
				+    }, # correct mistakes later
			
 
				+}
			
 
				+
			
 
				+# table defs to hold information on how to process the tables
			
 
				+table_defs = {
			
 
				+    'ES2.2': { # 1990-2021 Carbon Dioxide Emissions and Sequestration in Taiwan
			
 
				+        "tables": [1, 2],
			
 
				+        "rows_to_fix": {
			
 
				+            0: { 
			
 
				+                3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
			
 
				+                    '2.D Non-Energy Products from Fuels and', 
			
 
				+                    '4. Land Use, Land Use Change and Forestry'],
			
 
				+            },
			
 
				+        },
			
 
				+        "index_cols": ['GHG Emission Source and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Source and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "entity": "CO2",
			
 
				+        "unit": "kt",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Net GHG Emission (including LULUCF)': '0',
			
 
				+            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
			
 
				+        },            
			
 
				+    },
			
 
				+    'ES2.3': { # 1990-2021 Methane Emissions in Taiwan
			
 
				+        "tables": [3, 4],
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "entity": f"CH4 ({gwp_to_use})",
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total Methane Emissions': '0',
			
 
				+        },
			
 
				+        "drop_rows": [
			
 
				+            "5.B Garbage Biological Treatment", # has lower significant digits than in table ES3.6
			
 
				+            "2. Industrial Process and Product Use Sector",  # inconsistent with subsector sum (rounding)
			
 
				+        ],
			
 
				+    },
			
 
				+    'ES2.4': { # 1990-2021 Nitrous Oxide Emissions in Taiwan
			
 
				+        "tables": [5],
			
 
				+        "fix_cats": {
			
 
				+            0: {
			
 
				+                "Total Nitrous Oxide Emissionsl": "Total Nitrous Oxide Emissions",
			
 
				+            },
			
 
				+        },            
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "entity": f"N2O ({gwp_to_use})",
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total Nitrous Oxide Emissions': '0',
			
 
				+        },
			
 
				+        "drop_rows": [
			
 
				+            "3.F Field Burning of Agricultural Residues", # has lower significant digits than in table ES3.4
			
 
				+            "5. Waste Sector", # error in 1996 data
			
 
				+        ],
			
 
				+    },
			
 
				+    'ES2.5': { # 1990-2021 Fluoride-Containing Gas Emissions in Taiwan
			
 
				+        "tables": [6,7],
			
 
				+        "fix_cats": {},
			
 
				+        "rows_to_fix": {
			
 
				+            0: {
			
 
				+                -2: ['Total PFCs Emissions (2.E Electronics Industry)',
			
 
				+                    'Total SF6 Emissions',
			
 
				+                    'Total NF3 Emissions (2.E Electronics Industry)'],
			
 
				+            },
			
 
				+        },
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0,
			
 
				+        "gas_splitting": {
			
 
				+            "Total HFCs Emissions": f"HFCS ({gwp_to_use})",
			
 
				+            "Total PFCs Emissions (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
			
 
				+            "Total SF6 Emissions": f"SF6 ({gwp_to_use})",
			
 
				+            "Total NF3 Emissions (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
			
 
				+            "Total Fluoride-Containing Gas Emissions": f"FGASES ({gwp_to_use})",
			
 
				+            "GHG Emission Sources and Sinks": "entity",
			
 
				+        },
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            "Total HFCs Emissions": "2",
			
 
				+            "Total PFCs Emissions (2.E Electronics Industry)": "2.E",
			
 
				+            "Total SF6 Emissions": "2",
			
 
				+            "Total NF3 Emissions (2.E Electronics Industry)": "2.E",
			
 
				+            "Total Fluoride-Containing Gas Emissions": "2",
			
 
				+        },
			
 
				+    },
			
 
				+    'ES3.1': { # 1990-2021 Greenhouse Gas Emission in Taiwan by Sector
			
 
				+        "tables": [8],
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "entity": f"KYOTOGHG ({gwp_to_use})",
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Net GHG Emission (including LULUCF)': '0',
			
 
				+            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
			
 
				+        },
			
 
				+    },
			
 
				+    'ES3.2': { # 1990-2021 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
			
 
				+        "tables": [9,10],
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "gas_splitting": {
			
 
				+            "Total CO2 Emission": "CO2",
			
 
				+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
			
 
				+            "Total N2O Emission": f"N2O ({gwp_to_use})",
			
 
				+            "Total Emission from Energy Sector": f"KYOTOGHG ({gwp_to_use})",
			
 
				+            "GHG Emission Sources and Sinks": "entity",
			
 
				+        },
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total CO2 Emission': '1',
			
 
				+            'Total CH4 Emission': '1',
			
 
				+            'Total N2O Emission': '1',
			
 
				+            'Total Emission from Energy Sector': '1',
			
 
				+        },
			
 
				+    },
			
 
				+    'ES3.3': { # 1990-2021 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
			
 
				+        "tables": [11],
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "gas_splitting": {
			
 
				+            "Total CO2 Emission": "CO2",
			
 
				+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
			
 
				+            "Total N2O Emission": f"N2O ({gwp_to_use})",
			
 
				+            "Total HFCs Emission": f"HFCS ({gwp_to_use})",
			
 
				+            "Total PFCs Emission (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
			
 
				+            "Total SF6 Emission": f"SF6 ({gwp_to_use})",
			
 
				+            "Total NF3 Emission (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
			
 
				+            "Total Emission from IPPU Sector": f"KYOTOGHG ({gwp_to_use})",
			
 
				+            "GHG Emission Sources and Sinks": "entity",
			
 
				+        },
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total CO2 Emission': '2',
			
 
				+            'Total CH4 Emission': '2',
			
 
				+            'Total N2O Emission': '2',
			
 
				+            'Total HFCs Emission': '2',
			
 
				+            'Total PFCs Emission (2.E Electronics Industry)': '2.E',
			
 
				+            'Total SF6 Emission': '2',
			
 
				+            'Total NF3 Emission (2.E Electronics Industry)': '2.E',
			
 
				+            'Total Emission from IPPU Sector': '2',
			
 
				+        },
			
 
				+        "drop_rows": [
			
 
				+        #     ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
			
 
				+            "Total CH4 Emission",  # inconsistent with subsectors (rounding)
			
 
				+        ]
			
 
				+    }, 
			
 
				+    'ES3.4': { # 1990-2021 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
			
 
				+        "tables": [12,13],
			
 
				+        "rows_to_fix": {},
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "gas_splitting": {
			
 
				+            "Total CO2 Emission (3.H Urea applied)": "CO2",
			
 
				+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
			
 
				+            "Total N2O Emission": f"N2O ({gwp_to_use})",
			
 
				+            "Total Emission From Agriculture Sector": f"KYOTOGHG ({gwp_to_use})",
			
 
				+            "GHG Emission Sources and Sinks": "entity",
			
 
				+        },
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total CO2 Emission (3.H Urea applied)': '3.H',
			
 
				+            'Total CH4 Emission': '3',
			
 
				+            'Total N2O Emission': '3',
			
 
				+            'Total Emission From Agriculture Sector': '3',
			
 
				+        },
			
 
				+    }, 
			
 
				+    'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
			
 
				+        "tables": [14],
			
 
				+        "rows_to_fix": {
			
 
				+            0: {
			
 
				+                3: ["Total CO2 Emission"],
			
 
				+            },
			
 
				+        }, 
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'], 
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, # two column header
			
 
				+        "gas_splitting": {
			
 
				+            "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
			
 
				+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
			
 
				+            "Total N2O Emission": f"N2O ({gwp_to_use})",
			
 
				+            "Total Emission from Waste Sector": f"KYOTOGHG ({gwp_to_use})",
			
 
				+            "GHG Emission Sources and Sinks": "entity",
			
 
				+        },
			
 
				+        "unit": "ktCO2eq",
			
 
				+        "cat_codes_manual": {
			
 
				+            'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
			
 
				+            'Total CH4 Emission': '5',
			
 
				+            'Total N2O Emission': '5',
			
 
				+            'Total Emission from Waste Sector': '5',
			
 
				+        },
			
 
				+    }, 
			
 
				+}
			
 
				+
			
 
				+table_defs_skip = {
			
 
				+    'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
			
 
				+        "tables": [0],
			
 
				+        "rows_to_fix": {
			
 
				+            0: { 
			
 
				+                3: ['CO2'],
			
 
				+            },
			
 
				+            1: {  # wherte col 0 is empty
			
 
				+                3: ['Net GHG Emission', 'Total GHG Emission'],
			
 
				+            },
			
 
				+        },
			
 
				+        "index_cols": ['GHG', 'GWP'],
			
 
				+        "wide_keyword": 'GHG',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        "unit": "ktCO2eq",
			
 
				+    },
			
 
				+    'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
			
 
				+        "tables": [6],
			
 
				+        "rows_to_fix": {
			
 
				+            0: {
			
 
				+                -2: ['Total SF6 Emissions', 
			
 
				+                     'Total NF3 Emissions'],
			
 
				+            },
			
 
				+        },
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'],
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, 
			
 
				+        #"entity": "CO2",
			
 
				+        "unit": "ktCO2eq",
			
 
				+    },
			
 
				+    'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
			
 
				+        "tables": [12],
			
 
				+        "rows_to_fix": {}, 
			
 
				+        "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
			
 
				+        "wide_keyword": 'GHG Emission Sources and Sinks',
			
 
				+        "col_wide_kwd": 0, # two column header
			
 
				+        "unit": "kt",
			
 
				+        "entity": "CO2",
			
 
				+    }, # need to consider the two columns specially (merge?)
			
 
				+}
			
 
				+
			
 
				+
			
 
				+##### primap2 metadata
			
 
				+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
			
 
				+
			
 
				+time_format = "%Y"
			
 
				+
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+    # "area": "Geo_code",
			
 
				+}
			
 
				+
			
 
				+add_coords_cols = {
			
 
				+    #    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				+}
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_1996_Taiwan_Inv",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "TWN-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "scenario": "2023NIR",
			
 
				+    "area": "TWN",
			
 
				+    # unit fill by table
			
 
				+}
			
 
				+
			
 
				+coords_value_mapping = {
			
 
				+    "unit": "PRIMAP1",
			
 
				+    "category": "PRIMAP1",
			
 
				+}
			
 
				+
			
 
				+coords_value_filling = {}
			
 
				+
			
 
				+#
			
 
				+filter_remove = {}
			
 
				+
			
 
				+filter_keep = {}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html",
			
 
				+    "rights": "",
			
 
				+    "contact": "mail@johannes-guetschow.de",
			
 
				+    "title": "2023 Republic of China - National Greenhouse Gas Report",
			
 
				+    "comment": "Read fom pdf file and converted to PRIMAP2 format by Johannes Gütschow",
			
 
				+    "institution": "Republic of China - Environmental Protection Administration",
			
 
				+}
			
 
				+
			
 
				+##### processing information
			
 
				+cat_conversion = {
			
 
				+    'mapping': {
			
 
				+        '0': '0',
			
 
				+        'M.0.EL': 'M.0.EL',
			
 
				+        '1': '1',
			
 
				+        '1.A.1': '1.A.1',
			
 
				+        '1.A.2': '1.A.2',
			
 
				+        '1.A.3': '1.A.3',
			
 
				+        '1.A.4': '1.A.4',
			
 
				+        '1.A.4.a': '1.A.4.a',
			
 
				+        '1.A.4.b': '1.A.4.b',
			
 
				+        '1.A.4.c': '1.A.4.c',
			
 
				+        '1.B.1': '1.B.1',
			
 
				+        '1.B.2': '1.B.2',
			
 
				+        '2': '2',
			
 
				+        '2.A': '2.A',
			
 
				+        '2.B': '2.B',
			
 
				+        '2.C': '2.C',
			
 
				+        '2.D': '2.D',
			
 
				+        '2.E': '2.E',
			
 
				+        '2.F': '2.F',
			
 
				+        '2.G': '2.G',
			
 
				+        '2.H': '2.H',
			
 
				+        '3': 'M.AG',
			
 
				+        '3.A': '3.A.1',
			
 
				+        '3.B': '3.A.2',
			
 
				+        '3.C': '3.C.7',
			
 
				+        '3.D': 'M.3.AS',
			
 
				+        '3.F': '3.C.1.b',
			
 
				+        '3.H': '3.C.3',
			
 
				+        '4': 'M.LULUCF',
			
 
				+        '5': '4',
			
 
				+        '5.A': '4.A',
			
 
				+        '5.B': '4.B',
			
 
				+        '5.C': '4.C',
			
 
				+        '5.D': '4.D',
			
 
				+        '5.D.1': '4.D.1',
			
 
				+        '5.D.2': '4.D.2',
			
 
				+    },
			
 
				+    'aggregate': {
			
 
				+        '1.A': {'sources': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				+                'name': 'Fuel Combustion Activities'},
			
 
				+        '1.B': {'sources': ['1.B.1', '1.B.2'], 'name': 'Fugitive Emissions from Fuels'},
			
 
				+        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
			
 
				+              'name': 'Industrial Process and Product Use Sector'},
			
 
				+        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				+        '3.B': {'sources': ['M.LULUCF'], 'name': 'Land'},
			
 
				+        '3.C.1': {'sources': ['3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
			
 
				+        '3.C.5': {'sources': ['3.C.5.a', '3.C.5.b'],
			
 
				+                  'name': 'Indirect N2O Emissions from Managed Soils'},
			
 
				+        '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.AS', '3.C.7'],
			
 
				+                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				+        'M.AG.ELV': {'sources': ['3.C'],
			
 
				+                     'name': 'Agriculture excluding livestock emissions'},
			
 
				+        'M.AG': {'sources': ['3.A', '3.C'], 'name': 'Agriculture'},
			
 
				+        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},  # consistency check
			
 
				+        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4']}, # consistency check
			
 
				+        '0': {'sources': ['1', '2', '3', '4']},  # consistency check
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+basket_copy = {
			
 
				+    'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+    'entities': ["HFCS", "PFCS"],
			
 
				+    'source_GWP': gwp_to_use,
			
 
				+}
			
 
				+
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py
@@ -0,0 +1,228 @@
 
				+# this script reads data from Taiwan's 2023 national inventory
			
 
				+# Data is read from the english summary pdf
			
 
				+# TODO: add further GWPs and gas baskets
			
 
				+
			
 
				+import pandas as pd
			
 
				+import primap2 as pm2
			
 
				+import camelot
			
 
				+import copy
			
 
				+
			
 
				+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from UNFCCC_GHG_data.helper import compression, make_wide_table
			
 
				+from UNFCCC_GHG_data.helper import process_data_for_country, gas_baskets
			
 
				+from primap2.pm2io._data_reading import matches_time_format
			
 
				+
			
 
				+from config_TWN_NIR2022 import fix_rows
			
 
				+from config_TWN_NIR2023 import table_defs, page_defs, cat_code_regexp
			
 
				+from config_TWN_NIR2023 import terminology_proc
			
 
				+from config_TWN_NIR2023 import gwp_to_use, basket_copy
			
 
				+from config_TWN_NIR2023 import coords_cols, add_coords_cols, coords_defaults
			
 
				+from config_TWN_NIR2023 import coords_terminologies, coords_value_mapping
			
 
				+from config_TWN_NIR2023 import meta_data, cat_conversion
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# configuration
			
 
				+# ###
			
 
				+input_folder = downloaded_data_path / 'non-UNFCCC' / 'Taiwan' / '2023_NIR'
			
 
				+output_folder = extracted_data_path / 'non-UNFCCC' / 'Taiwan'
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+output_filename = 'TWN_inventory_2023_'
			
 
				+inventory_file = '2023_NIR_executive_summary_english.pdf'
			
 
				+
			
 
				+# ###
			
 
				+# read the tables from pdf
			
 
				+# ###
			
 
				+
			
 
				+all_tables = []
			
 
				+for page in page_defs:
			
 
				+    print(f"Reading from page {page}")
			
 
				+    new_tables = camelot.read_pdf(
			
 
				+        str(input_folder / inventory_file),
			
 
				+        pages=page,
			
 
				+        **page_defs[page],
			
 
				+        )
			
 
				+    for table in new_tables:
			
 
				+        all_tables.append(table.df)
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# convert tables to primap2 format
			
 
				+# ###
			
 
				+data_pm2 = None
			
 
				+for table_name in table_defs.keys():
			
 
				+    print(f"Working on table: {table_name}")
			
 
				+
			
 
				+    table_def = copy.deepcopy(table_defs[table_name])
			
 
				+    # combine all raw tables
			
 
				+    df_this_table = all_tables[table_def["tables"][0]].copy(deep=True)
			
 
				+    if len(table_def["tables"]) > 1:
			
 
				+        for table in table_def["tables"][1:]:
			
 
				+            df_this_table = pd.concat(
			
 
				+                [df_this_table, all_tables[table]],
			
 
				+                axis=0,
			
 
				+                join='outer')
			
 
				+
			
 
				+    # fix for table ES3.6
			
 
				+    if table_name == 'ES3.6':
			
 
				+        col_idx = df_this_table[0] == "Total CO Emission"
			
 
				+        df_this_table.loc[col_idx, 1:] = ''
			
 
				+        df_this_table.loc[col_idx, 0] = 'Total CO2 Emission'
			
 
				+
			
 
				+    df_this_table = df_this_table.reset_index(drop=True)
			
 
				+
			
 
				+    # fix categories if necessary
			
 
				+    if "fix_cats" in table_def.keys():
			
 
				+        for col in table_def["fix_cats"]:
			
 
				+            df_this_table[col] = df_this_table[col].replace(table_def["fix_cats"][col])
			
 
				+
			
 
				+    # fix rows
			
 
				+    for col in table_def["rows_to_fix"].keys():
			
 
				+        for n_rows in table_def["rows_to_fix"][col].keys():
			
 
				+            print(f"Fixing {col}, {n_rows}")
			
 
				+            # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("–", "-")
			
 
				+            df_this_table = fix_rows(df_this_table,
			
 
				+                                     table_def["rows_to_fix"][col][n_rows], col, n_rows)
			
 
				+
			
 
				+    # split by entity
			
 
				+    if "gas_splitting" in table_def.keys():
			
 
				+        col_entity = [''] * len(df_this_table)
			
 
				+        last_entity = ''
			
 
				+        for i in range(0, len(df_this_table)):
			
 
				+            current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
			
 
				+            if current_header in table_def["gas_splitting"].keys():
			
 
				+                last_entity = table_def["gas_splitting"][current_header]
			
 
				+            col_entity[i] = last_entity
			
 
				+
			
 
				+        df_this_table["entity"] = col_entity
			
 
				+        table_def["index_cols"].append("entity")
			
 
				+
			
 
				+    # make a wide table
			
 
				+    df_this_table = make_wide_table(df_this_table, table_def["wide_keyword"],
			
 
				+                                    table_def["col_wide_kwd"], table_def["index_cols"])
			
 
				+
			
 
				+    if "drop_rows" in table_def.keys():
			
 
				+        df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
			
 
				+
			
 
				+    # reset row index
			
 
				+    df_this_table = df_this_table.reset_index(drop=False)
			
 
				+
			
 
				+    # add entity
			
 
				+    if "entity" in table_def.keys():
			
 
				+        df_this_table["entity"] = table_def["entity"]
			
 
				+
			
 
				+    # add unit
			
 
				+    df_this_table["unit"] = table_def["unit"]
			
 
				+
			
 
				+    df_this_table = df_this_table.rename({table_def["index_cols"][0]: "orig_cat_name"},
			
 
				+                                         axis=1)
			
 
				+
			
 
				+    # print(table_def["index_cols"][0])
			
 
				+    # print(df_this_table.columns.values)
			
 
				+
			
 
				+    # make a copy of the categories row
			
 
				+    df_this_table["category"] = df_this_table["orig_cat_name"]
			
 
				+
			
 
				+    # replace cat names by codes in col "category"
			
 
				+    # first the manual replacements
			
 
				+    df_this_table["category"] = df_this_table["category"].replace(
			
 
				+        table_def["cat_codes_manual"])
			
 
				+    # then the regex replacements
			
 
				+    repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				+    df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
			
 
				+                                                                      repl, regex=True)
			
 
				+
			
 
				+    ### convert to PRIMAP2 IF
			
 
				+    # remove ','
			
 
				+    time_format = '%Y'
			
 
				+    time_columns = [
			
 
				+        col
			
 
				+        for col in df_this_table.columns.values
			
 
				+        if matches_time_format(col, time_format)
			
 
				+    ]
			
 
				+
			
 
				+    for col in time_columns:
			
 
				+        df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(',', '',
			
 
				+                                                                          regex=False)
			
 
				+
			
 
				+    # drop orig_cat_name as it's not unique per category
			
 
				+    df_this_table = df_this_table.drop(columns="orig_cat_name")
			
 
				+
			
 
				+    # coords_defaults_this_table = coords_defaults.copy()
			
 
				+    # coords_defaults_this_table["unit"] = table_def["unit"]
			
 
				+    df_this_table_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_this_table,
			
 
				+        coords_cols=coords_cols,
			
 
				+        add_coords_cols=add_coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				+        meta_data=meta_data
			
 
				+    )
			
 
				+
			
 
				+    this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
			
 
				+
			
 
				+    if data_pm2 is None:
			
 
				+        data_pm2 = this_table_pm2
			
 
				+    else:
			
 
				+        data_pm2 = data_pm2.pr.merge(this_table_pm2)
			
 
				+
			
 
				+# convert back to IF to have units in the fixed format
			
 
				+data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+# ###
			
 
				+# save data
			
 
				+# ###
			
 
				+# data in original categories
			
 
				+pm2.pm2io.write_interchange_format(output_folder /
			
 
				+                                   (output_filename + coords_terminologies["category"]),
			
 
				+                                   data_if)
			
 
				+encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+data_pm2.pr.to_netcdf((output_folder /
			
 
				+                       (output_filename + coords_terminologies[
			
 
				+                           "category"])).with_suffix(".nc"),
			
 
				+                      encoding=encoding)
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# convert to IPCC2006 categories
			
 
				+# ###
			
 
				+data_proc_pm2 = data_pm2.copy(deep=True)
			
 
				+
			
 
				+
			
 
				+country_processing = {
			
 
				+    'basket_copy': basket_copy,
			
 
				+}
			
 
				+
			
 
				+data_proc_pm2 = process_data_for_country(
			
 
				+    data_proc_pm2,
			
 
				+    entities_to_ignore=[],
			
 
				+    gas_baskets=gas_baskets,
			
 
				+    processing_info_country=country_processing,
			
 
				+    cat_terminology_out = terminology_proc,
			
 
				+    category_conversion = cat_conversion,
			
 
				+)
			
 
				+
			
 
				+# convert to IF
			
 
				+data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+# ###
			
 
				+# save data
			
 
				+# ###
			
 
				+# data in 2006 categories
			
 
				+pm2.pm2io.write_interchange_format(output_folder /
			
 
				+                                   (output_filename + "IPCC2006_PRIMAP"),
			
 
				+                                   data_proc_if)
			
 
				+encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+data_proc_pm2.pr.to_netcdf((output_folder /
			
 
				+                            (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
			
 
				+                           encoding=encoding)
			
--- a/UNFCCC_GHG_data/helper/__init__.py
+++ b/UNFCCC_GHG_data/helper/__init__.py
@@ -8,7 +8,7 @@ from .definitions import GWP_factors, gas_baskets
 
				 from .definitions import compression
			
 
				 from .functions import get_country_code, get_country_name, convert_categories
			
 
				 from .functions import create_folder_mapping, process_data_for_country, get_code_file
			
 
				-from .functions import fix_rows
			
 
				+from .functions import fix_rows, make_wide_table
			
 
				 
			
 
				 __all__ = [
			
 
				     "root_path",
			
@@ -31,5 +31,6 @@ __all__ = [
 
				     "create_folder_mapping",
			
 
				     "process_data_for_country",
			
 
				     "fix_rows",
			
 
				+    "make_wide_table"
			
 
				     "compression",
			
 
				 ]
			
--- a/UNFCCC_GHG_data/helper/functions.py
+++ b/UNFCCC_GHG_data/helper/functions.py
@@ -8,7 +8,7 @@ import pandas as pd
 
				 import numpy as np
			
 
				 from datetime import date
			
 
				 from copy import deepcopy
			
 
				-from typing import Dict, List, Optional
			
 
				+from typing import Dict, List, Optional, Union
			
 
				 from pathlib import Path
			
 
				 from .definitions import custom_country_mapping, custom_folders
			
 
				 from .definitions import root_path, downloaded_data_path, extracted_data_path
			
@@ -378,6 +378,7 @@ def convert_categories(
 
				 ) -> xr.Dataset:
			
 
				     """
			
 
				     convert data from one category terminology to another
			
 
				+    # TODO rewrite to use aggregate_coordinates functions
			
 
				     """
			
 
				     print(f"converting categories to {terminology_to}")
			
 
				 
			
@@ -980,3 +981,40 @@ def fix_rows(
 
				         data.loc[indices_to_merge[0]] = new_row
			
 
				         data = data.drop(indices_to_merge[1:])
			
 
				     return data
			
 
				+
			
 
				+
			
 
				+def make_wide_table(
			
 
				+        data: pd.DataFrame,
			
 
				+        keyword: str,
			
 
				+        col: Union[int, str],
			
 
				+        index_cols: List[Union[int, str]]
			
 
				+) -> pd.DataFrame:
			
 
				+    index = data.loc[data[col] == keyword].index
			
 
				+    if not list(index):
			
 
				+        print("Keyword for table transformation not found")
			
 
				+        return data
			
 
				+    elif len(index)==1:
			
 
				+        print("Keyword for table transformation found only once")
			
 
				+        return data
			
 
				+    else:
			
 
				+        df_all = None
			
 
				+        for i, item in enumerate(index):
			
 
				+            loc = data.index.get_loc(item)
			
 
				+            if i < len(index) - 1:
			
 
				+                next_loc = data.index.get_loc(index[i + 1])
			
 
				+            else:
			
 
				+                next_loc = data.index[-1] + 1
			
 
				+            df_to_add = data.loc[list(range(loc, next_loc))]
			
 
				+            # select only cols which don't have NaN, Null, or '' as header
			
 
				+            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
			
 
				+            df_to_add = df_to_add.loc[: , filter_nan]
			
 
				+            df_to_add.columns = df_to_add.iloc[0]
			
 
				+            #print(df_to_add.columns)
			
 
				+            df_to_add = df_to_add.drop(loc)
			
 
				+            df_to_add = df_to_add.set_index(index_cols)
			
 
				+
			
 
				+            if df_all is None:
			
 
				+                df_all = df_to_add
			
 
				+            else:
			
 
				+                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
			
 
				+        return df_all