jguetschow
/
UNFCCC_non-AnnexI_data


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
							# config and functions for Taiwan NIR 2022

from typing import Union, List
import pandas as pd

gwp_to_use = "AR4GWP100"

def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int)->pd.DataFrame:
    for row in rows_to_fix:
        #print(row)
        # find the row number and collect the row and the next two rows
        index = data.loc[data[col_to_use] == row].index
        if not list(index):
            print(f"Can't merge split row {row}")
            print(data[col_to_use])
        print(f"Merging split row {row}")
        indices_to_drop = []
        ####print(index)
        for item in index:
            loc = data.index.get_loc(item)
            ####print(data[col_to_use].loc[loc + 1])
            if n_rows == -2:
                locs_to_merge = list(range(loc - 1, loc + 1))
                loc_to_check = loc - 1
            #if n_rows == -3:
            #    locs_to_merge = list(range(loc - 1, loc + 2))
            #elif n_rows == -5:
            #    locs_to_merge = list(range(loc - 1, loc + 4))
            else:
                locs_to_merge = list(range(loc, loc + n_rows))
                loc_to_check = loc + 1
            
            if data[col_to_use].loc[loc_to_check] == '':
                rows_to_merge = data.iloc[locs_to_merge]
                indices_to_merge = rows_to_merge.index
                # replace numerical NaN values
                ####print(rows_to_merge)
                rows_to_merge = rows_to_merge.fillna('')
                ####print("fillna")
                ####print(rows_to_merge)
                # join the three rows
                new_row = rows_to_merge.agg(' '.join)
                # replace the double spaces that are created 
                # must be done here and not at the end as splits are not always 
                # the same and join would produce different col values
                new_row = new_row.str.replace("  ", " ")  
                new_row = new_row.str.strip()
                #new_row = new_row.str.replace("N O", "NO") 
                #new_row = new_row.str.replace(", N", ",N")
                #new_row = new_row.str.replace("- ", "-")
                data.loc[indices_to_merge[0]] = new_row
                indices_to_drop = indices_to_drop + list(indices_to_merge[1:])
        
        data = data.drop(indices_to_drop)
        data = data.reset_index(drop=True)
    return data

def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: List[Union[int, str]])->pd.DataFrame:
    index = data.loc[data[col] == keyword].index
    if not list(index):
        print("Keyword for table transformation not found")
        return data
    elif len(index)==1:
        print("Keyword for table transformation found only once")
        return data
    else:
        df_all = None
        for i, item in enumerate(index):
            loc = data.index.get_loc(item)
            if i < len(index) - 1:
                next_loc = data.index.get_loc(index[i + 1])
            else:
                next_loc = data.index[-1] + 1
            df_to_add = data.loc[list(range(loc, next_loc))]
            # select only cols which don't have NaN, Null, or '' as header
            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
            df_to_add = df_to_add.loc[: , filter_nan]
            df_to_add.columns = df_to_add.iloc[0]
            #print(df_to_add.columns)
            df_to_add = df_to_add.drop(loc)
            df_to_add = df_to_add.set_index(index_cols)
            
            if df_all is None:
                df_all = df_to_add
            else:
                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
        return df_all
        

# page defs tp hold information on reading the table
page_defs = {
    '5': { 
        "table_areas": ['36,523,563,68'],
        "split_text": False,
        "flavor": "stream",
    },
    '6': {
        "table_areas": ['34,562,563,53'],
        #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
        "split_text": True,
        "flavor": "stream",
    },
    '7': {
        "table_areas": ['36,740,499,482', '36,430,564,53'],
        "split_text": True,
        "flavor": "stream",
    },
    '8': {
        "table_areas": ['35,748,503,567'],
        "split_text": True,
        "flavor": "stream",
    },
    '9': {
        "table_areas": ['35,747,565,315', '36,273,565,50'],
        "split_text": False,
        "flavor": "stream",
    },
    '11': {
        "table_areas": ['35,744,563,434'],
        "split_text": True,
        "flavor": "stream",
    },
    '12': {
        "table_areas": ['33,747,562,86'],
        "split_text": True,
        "flavor": "stream",
    },
    '13': {
        "table_areas": ['34,303,564,54'],
        "split_text": True,
        "flavor": "stream",
    },
    '14': {
        "table_areas": ['34,754,564,256'],
        "columns": ['220,251,283,314,344,371,406,438,470,500,530'],
        "split_text": True,
        "flavor": "stream",
    },
    '15': {
        "table_areas": ['34,487,564,42'],
        "split_text": True,
        "flavor": "stream",
    },
    '16': {
        "table_areas": ['34,418,564,125'],
        #"columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
        "split_text": True,
        "flavor": "lattice",
    }, # with stream the row index is messed up with lattice the column index ... red with lattice and fix col header manualy
    '17': {
        "table_areas": ['34,534,564,49'],
        "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
        "split_text": True,
        "flavor": "stream",
    },
}

# table defs to hold information on how to process the tables
table_defs = {
    'ES2.2': { # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
        "tables": [1, 2],
        "rows_to_fix": {
            0: { 
                3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
                    '2.D Non-Energy Products from Fuels and', 
                    '4. Land Use, Land Use Change and Forestry'],
            },
        },
        "index_cols": ['GHG Emission Source and Sinks'],
        "wide_keyword": 'GHG Emission Source and Sinks',
        "col_wide_kwd": 0, 
        "entity": "CO2",
        "unit": "kt",
        "cat_codes_manual": {
            'Net GHG Emission (including LULUCF)': '0',
            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
        },            
    },
    'ES2.3': { # 1990-2020 Methane Emissions in Taiwan
        "tables": [3, 4],
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "entity": f"CH4 ({gwp_to_use})",
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total Methane Emissions': '0',
        },
    },
    'ES2.4': { # 1990-2020 Nitrous Oxide Emissions in Taiwan
        "tables": [5],
        "fix_cats": {
            0: {
                "Total Nitrous Oxide Emissionsl": "Total Nitrous Oxide Emissions",
            },
        },            
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "entity": f"N2O ({gwp_to_use})",
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total Nitrous Oxide Emissions': '0',
        },        
    },
    'ES3.1': { # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
        "tables": [7],
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "entity": f"KYOTOGHG ({gwp_to_use})",
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Net GHG Emission (including LULUCF)': '0',
            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
        },
    },
    'ES3.2': { # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
        "tables": [8],
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "gas_splitting": {
            "Total CO2 Emission": "CO2",
            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
            "Total N2O Emission": f"N2O ({gwp_to_use})",
            "Total Emission from Energy Sector": f"KYOTOGHG ({gwp_to_use})",
            "GHG Emission Sources and Sinks": "entity",
        },
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total CO2 Emission': '1',
            'Total CH4 Emission': '1',
            'Total N2O Emission': '1',
            'Total Emission from Energy Sector': '1',
        },
    },
    'ES3.3': { # 1990-2020 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
        "tables": [9,10],
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "gas_splitting": {
            "Total CO2 Emission": "CO2",
            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
            "Total N2O Emission": f"N2O ({gwp_to_use})",
            "Total HFCs Emission": f"HFCS ({gwp_to_use})",
            "Total PFCs Emission (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
            "Total SF6 Emission": f"SF6 ({gwp_to_use})",
            "Total NF3 Emission (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
            "Total Emission from IPPU Sector": f"KYOTOGHG ({gwp_to_use})",
            "GHG Emission Sources and Sinks": "entity",
        },
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total CO2 Emission': '2',
            'Total CH4 Emission': '2',
            'Total N2O Emission': '2',
            'Total HFCs Emission': '2',
            'Total PFCs Emission (2.E Electronics Industry)': '2.E',
            'Total SF6 Emission': '2',
            'Total NF3 Emission (2.E Electronics Industry)': '2.E',
            'Total Emission from IPPU Sector': '2',
        },
        "drop_rows": [
            ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
        ]
    }, 
    'ES3.4': { # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
        "tables": [11],
        "rows_to_fix": {},
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        "gas_splitting": {
            "Total CO2 Emission (3.H Urea applied)": "CO2",
            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
            "Total N2O Emission": f"N2O ({gwp_to_use})",
            "Total Emission From Agriculture Sector": f"KYOTOGHG ({gwp_to_use})",
            "GHG Emission Sources and Sinks": "entity",
        },
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total CO2 Emission (3.H Urea applied)': '3.H',
            'Total CH4 Emission': '3',
            'Total N2O Emission': '3',
            'Total Emission From Agriculture Sector': '3',
        },
    }, 
    'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
        "tables": [13],
        "rows_to_fix": {
            0: {
                3: ["Total CO2 Emission"],
            },
        }, 
        "index_cols": ['GHG Emission Sources and Sinks'], 
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, # two column header
        "gas_splitting": {
            "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
            "Total N2O Emission": f"N2O ({gwp_to_use})",
            "Total Emission from Waste Sector": f"KYOTOGHG ({gwp_to_use})",
            "GHG Emission Sources and Sinks": "entity",
        },
        "unit": "ktCO2eq",
        "cat_codes_manual": {
            'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
            'Total CH4 Emission': '5',
            'Total N2O Emission': '5',
            'Total Emission from Waste Sector': '5',
        },
    }, 
}

table_defs_skip = {
    'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
        "tables": [0],
        "rows_to_fix": {
            0: { 
                3: ['CO2'],
            },
            1: {  # wherte col 0 is empty
                3: ['Net GHG Emission', 'Total GHG Emission'],
            },
        },
        "index_cols": ['GHG', 'GWP'],
        "wide_keyword": 'GHG',
        "col_wide_kwd": 0, 
        "unit": "ktCO2eq",
    },
    'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
        "tables": [6],
        "rows_to_fix": {
            0: {
                -2: ['Total SF6 Emissions', 
                     'Total NF3 Emissions'],
            },
        },
        "index_cols": ['GHG Emission Sources and Sinks'],
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, 
        #"entity": "CO2",
        "unit": "ktCO2eq",
    },
    'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
        "tables": [12],
        "rows_to_fix": {}, 
        "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
        "wide_keyword": 'GHG Emission Sources and Sinks',
        "col_wide_kwd": 0, # two column header
        "unit": "kt",
        "entity": "CO2",
    }, # need to consider the two columns specially (merge?)
}