Browse Source

Add code to read Chile's BUR4

Johannes Gütschow 3 years ago
parent
commit
5c3e7b8b34

+ 186 - 0
code/UNFCCC_reader/Chile/config_CHL_BUR4.py

@@ -0,0 +1,186 @@
+## parameters for conversion to IPCC2006 categories
+filter_remove_IPCC2006 = {
+    "filter_cats": { # filter cats that have no 1:1 match for IPCC2006 or are additional subsectors
+        "category (IPCC2006_PRIMAP)": [
+            # refrigeration and air conditioning subsectors don't match IPCC2006
+            '2.F.1.a', '2.F.1.b', '2.F.1.c', '2.F.1.d', '2.F.1.e', '2.F.1.f',
+            # additional subsectors for other cattle in enteric fermentation
+            '3.A.1.b.i', '3.A.1.b.ii', '3.A.1.b.iii', '3.A.1.b.iv', '3.A.1.b.v',
+            # additional subcategories for swine in enteric fermentation
+            '3.A.3.a', '3.A.3.b', '3.A.3.c',
+            # other animals in enteric fermentation not fitting the IPCC2006 other animals
+            '3.A.4',
+            # need to be summed to '3.A.4.j'
+            '3.A.4.f', '3.A.4.g', '3.A.4.g.i', '3.A.4.g.ii',
+            # additional subsectors for other cattle in enteric fermentation
+            '3.B.1.b.i', '3.B.1.b.ii', '3.B.1.b.iii', '3.B.1.b.iv', '3.B.1.b.v',
+            # additional subcategories for swine in enteric fermentation
+            '3.B.3.a', '3.B.3.b', '3.B.3.c',
+            # other animals in enteric fermentation not fitting the IPCC2006 other animals
+            '3.B.4',
+            # need to be summed to '3.A.4.j'
+            '3.B.4.f', '3.B.4.g', '3.B.4.g.i', '3.B.4.g.ii',
+            # subsectors of indirect N2O from manure management
+            '3.B.5.a', '3.B.5.b', '3.B.5.c', '3.B.5.d', '3.B.5.d.i', '3.B.5.d.ii',
+            '3.B.5.d.iii', '3.B.5.d.iv', '3.B.5.d.v', '3.B.5.d.vi', '3.B.5.d.vii',
+            # subsectors of rice cultivation
+            '3.C.1', '3.C.2', '3.C.3', '3.C.4',
+            # no direct represenation of "agricultural soils" in IPCC 2006
+            '3.D',
+            # subsectors of 3.D.1. not matching subsectors of 3.C.4 (direct emissions from managed soils)
+            # '3.D.1.a.': '3.C.1.a', '3.D.1.b.': '3.C.1.b', '3.D.1.c.': '3.A.4.c', '3.D.1.d.': '3.C.4.d',
+            '3.D.1.a', '3.D.1.b', '3.D.1.b.i', '3.D.1.b.ii', '3.D.1.b.iii', '3.D.1.c',
+            '3.D.1.d', '3.D.1.e', '3.D.1.f', '3.D.1.g',
+            # additional subsector level of 3.D.2.a (3.C.5.a Atmospheric deposition)
+            '3.D.2.a.i', '3.D.2.a.ii', '3.D.2.a.ii.1', '3.D.2.a.ii.2', '3.D.2.a.ii.3', '3.D.2.a.iii',
+            # additional subsector level of 3.D.2.b (3.C.5.b Nitrongen leaching and runoff)
+            '3.D.2.b.i', '3.D.2.b.ii', '3.D.2.b.ii.1', '3.D.2.b.ii.2', '3.D.2.b.ii.3', '3.D.2.b.iii',
+            '3.D.2.b.iv', '3.D.2.b.v',
+            # additional subsector level of 3.F (3.C.1.b Biomass burning in cropland)
+            '3.F.1', '3.F.2', '3.F.3',
+            # additional subsector level of 3.G (3.C.2 Liming)
+            '3.G.1', '3.G.2',
+            # additional subsector levels of 4.A.1 (3.A.1.a Forest land remaining forest land)
+            '4.A.1.a', '4.A.1.a.i', '4.A.1.a.i.1', '4.A.1.a.i.1.a', '4.A.1.a.i.1.b', '4.A.1.a.i.1.c',
+            '4.A.1.a.i.1.d', '4.A.1.a.i.1.e', '4.A.1.a.i.1.f', '4.A.1.a.i.1.g', '4.A.1.a.i.1.h', 
+            '4.A.1.a.i.1.i', '4.A.1.a.i.1.j', '4.A.1.a.i.1.k', '4.A.1.a.i.1.l', '4.A.1.a.i.2', 
+            '4.A.1.a.i.2.a', '4.A.1.a.i.2.b', '4.A.1.a.i.2.c', '4.A.1.a.i.2.d', '4.A.1.a.i.2.e',
+            '4.A.1.a.i.2.f', '4.A.1.a.i.2.g', '4.A.1.a.i.2.h', '4.A.1.a.i.2.i', '4.A.1.a.i.2.j',
+            '4.A.1.a.i.2.k', '4.A.1.a.i.2.l', '4.A.1.a.i.3', '4.A.1.a.i.3.a', '4.A.1.a.i.3.b',
+            '4.A.1.a.i.3.c', '4.A.1.a.i.3.d', '4.A.1.a.i.3.e', '4.A.1.a.i.3.f', '4.A.1.a.i.3.g',
+            '4.A.1.a.i.3.h', '4.A.1.a.i.3.i', '4.A.1.a.i.3.j', '4.A.1.a.i.3.k', '4.A.1.a.i.3.l',
+            '4.A.1.a.ii', '4.A.1.a.ii.1', '4.A.1.a.ii.2', '4.A.1.a.ii.3', '4.A.1.a.ii.4',
+            '4.A.1.a.ii.5', '4.A.1.a.ii.6', '4.A.1.a.ii.7', '4.A.1.b', '4.A.1.b.i', '4.A.1.b.i.1',
+            '4.A.1.b.i.2', '4.A.1.b.i.3', '4.A.1.b.i.4', '4.A.1.b.ii', '4.A.1.b.ii.1', '4.A.1.b.ii.2',
+            '4.A.1.b.iii', '4.A.1.b.iii.1', '4.A.1.b.iii.1.a', '4.A.1.b.iii.1.b', '4.A.1.b.iii.2',
+            '4.A.1.b.iv', '4.A.1.c', '4.A.1.c.i', '4.A.1.c.ii',
+            # additional subsector level in land converted to forest land
+            '4.A.2.a.i', '4.A.2.a.ii', '4.A.2.b.i', '4.A.2.b.ii', '4.A.2.c.i', '4.A.2.c.ii',
+            '4.A.2.d.i', '4.A.2.d.ii', '4.A.2.e.i', '4.A.2.e.ii',
+            # subsectors of solid waste disposal might not match
+            '5.A.1', '5.A.2', '5.A.3',
+        ],
+    },
+}
+
+
+cat_mapping = { # categories not listed here have the same code as in IPCC 2006 specifications
+    '3': 'M.AG',
+    '3.A': '3.A.1',
+    '3.A.1': '3.A.1.a',
+    '3.A.1.a': '3.A.1.a.i',
+    '3.A.1.b': '3.A.1.a.ii',
+    '3.A.2': '3.A.1.c',
+    '3.A.3': '3.A.1.h',
+    '3.A.4.a': '3.A.1.b',
+    '3.A.4.b': '3.A.1.d',
+    '3.A.4.c': '3.A.1.f',
+    '3.A.4.d': '3.A.1.g',
+    '3.A.4.e': '3.A.1.i',
+    '3.B': '3.A.2',
+    '3.B.1': '3.A.2.a',
+    '3.B.1.a': '3.A.2.a.i',
+    '3.B.1.b': '3.A.2.a.ii',
+    '3.B.2': '3.A.2.c',
+    '3.B.3': '3.A.2.h',
+    '3.B.4.a': '3.A.2.b',
+    '3.B.4.b': '3.A.2.d',
+    '3.B.4.c': '3.A.2.f',
+    '3.B.4.d': '3.A.2.g',
+    '3.B.4.e': '3.A.2.i',
+    '3.B.5': '3.C.6',
+    '3.C': '3.C.7',
+    '3.D.1': '3.C.4', 
+    '3.D.2': '3.C.5',
+    '3.D.2.a': '3.C.5.a', # not in climate_categories
+    '3.D.2.b': '3.C.5.b', # not in climate_categories
+    '3.E': '3.C.1.c',
+    '3.F': '3.C.1.b',
+    '3.G': '3.C.2',
+    '3.H': '3.C.3',
+    '3.I': '3.C.8.a', # merge this with cat below
+    '3.J': '3.C.8.b', # merge with cat above
+    '4': 'M.LULUCF',
+    '4.A': '3.B.1',
+    '4.A.1': '3.B.1.a',
+    '4.A.2': '3.B.1.b',
+    '4.A.2.a': '3.B.1.b.i',
+    '4.A.2.b': '3.B.1.b.ii',
+    '4.A.2.c': '3.B.1.b.iii',
+    '4.A.2.d': '3.B.1.b.iv',
+    '4.A.2.e': '3.B.1.b.v',
+    '4.B': '3.B.2',
+    '4.B.1': '3.B.2.a',
+    '4.B.2': '3.B.2.b',
+    '4.B.2.a': '3.B.2.b.i',
+    '4.B.2.b': '3.B.2.b.ii',
+    '4.B.2.c': '3.B.2.b.iii',
+    '4.B.2.d': '3.B.2.b.iv',
+    '4.B.2.e': '3.B.2.b.v',
+    '4.C': '3.B.3',
+    '4.C.1': '3.B.3.a',
+    '4.C.2': '3.B.3.b',
+    '4.C.2.a': '3.B.3.b.i',
+    '4.C.2.b': '3.B.3.b.ii',
+    '4.C.2.c': '3.B.3.b.iii',
+    '4.C.2.d': '3.B.3.b.iv',
+    '4.C.2.e': '3.B.3.b.v',
+    '4.D': '3.B.4',
+    '4.D.1': '3.B.4.a',
+    '4.D.2': '3.B.4.b',
+    '4.D.2.a': '3.B.4.b.i',
+    '4.D.2.b': '3.B.4.b.ii',
+    '4.D.2.c': '3.B.4.b.iii',
+    '4.D.2.d': '3.B.4.b.iv',
+    '4.D.2.e': '3.B.4.b.v',
+    '4.E': '3.B.5',
+    '4.E.1': '3.B.5.a',
+    '4.E.2': '3.B.5.b',
+    '4.E.2.a': '3.B.5.b.i',
+    '4.E.2.b': '3.B.5.b.ii',
+    '4.E.2.c': '3.B.5.b.iii',
+    '4.E.2.d': '3.B.5.b.iv',
+    '4.E.2.e': '3.B.5.b.v',
+    '4.F': '3.B.6',
+    '4.F.1': '3.B.6.a',
+    '4.F.2': '3.B.6.b',
+    '4.F.2.a': '3.B.6.b.i',
+    '4.F.2.b': '3.B.6.b.ii',
+    '4.F.2.c': '3.B.6.b.iii',
+    '4.F.2.d': '3.B.6.b.iv',
+    '4.F.2.e': '3.B.6.b.v',
+    '4.G': '3.D.1',
+    '4.H': '3.D.2',
+    '5': '4',
+    '5.A': '4.A',
+    '5.B': '4.B',
+    '5.C': '4.C',
+    '5.C.1': '4.C.1',
+    '5.C.2': '4.C.2',
+    '5.D': '4.D',
+    '5.D.1': '4.D.1',
+    '5.D.2': '4.D.2',
+    '5.E': '4.E',
+}
+
+# comments
+# '2.F.1.a.': included in '2.F.1.a.3', # not in climate categories
+# '2.F.1.b.': included in '2.F.1.a.2', # not in climate categories
+# '2.F.1.c.': included in '2.F.1.a.1', # not in climate categories 
+# '2.F.1.d.': included in 2.F.1.a (transport refigeration)
+# '2.F.1.e.', includeded in 2.F.1.a (stationary air conditioning)
+# '2.F.1.f.': 2.F.1.b, (mobile air conditioning) 
+#    '3.A.4.f.': included in '3.A.1.j',
+# '3.A.4.g.': included in '3.A.1.j',
+# '3.A.4.g.i.',
+# '3.A.4.g.ii.',
+
+aggregate_cats = {
+    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
+    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
+    '3.C.1': {'sources': ['3.C.1.b','3.C.1.c'], 'name': 'Emissions from Biomass Burning'},
+    '3.C.8': {'sources': ['3.C.8.a', '3.C.8.b'], 'name': 'Other'},
+    '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7', '3.C.8'], 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
+    '3.D': {'sources': ['3.D.1', '3.D.2'], 'name': 'Other'},
+    '3': {'sources': ['3.A', '3.B', '3.C', '3.D'], 'name': 'AFOLU'},
+}

+ 261 - 0
code/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py

@@ -0,0 +1,261 @@
+# this script reads data from Chiles 2020 national inventory which is underlying BUR4
+# Data is read from the xlsx file
+
+import pandas as pd
+import primap2 as pm2
+import re
+from pathlib import Path
+
+from config_CHL_BUR4 import cat_mapping, filter_remove_IPCC2006, aggregate_cats
+from primap2.pm2io._data_reading import matches_time_format
+from primap2.pm2io._data_reading import filter_data
+
+# ###
+# configuration
+# ###
+
+input_folder = Path('..') / '..' / '..' / 'downloaded_data' / 'UNFCCC' / 'Chile' / 'BUR4'
+output_folder = Path('..') / '..' / '..' / 'extracted_data' / 'Chile'
+
+output_filename = 'CHL_BUR4_2021_'
+
+inventory_file = 'Inventario_Nacional_de_GEI-1990-2018.xlsx'
+years_to_read = range(1990, 2018 + 1)
+
+# configuration for conversion to PRIMAP2 data format
+unit_row = "header"
+unit_info = {
+    'regexp_entity': r'(.*)\s\(.*\)$',
+    'regexp_unit': r'.*\s\((.*)\)$',
+    'default_unit': 'kt',
+    'manual_repl_unit': {
+        'kt CO₂ eq': 'ktCO2eq',
+        'HFC (kt CO₂ eq)': 'ktCO2eq',
+        'PFC (kt CO₂ eq)': 'ktCO2eq',
+        'SF₆ (kt CO₂ eq)': 'ktCO2eq',
+    },
+    'manual_repl_entity': {
+        'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
+        'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
+        'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
+        'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
+    }
+}
+cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
+                'Categorías de fuente y sumidero de gases de efecto invernadero.1']
+# columns for category code and original category name
+index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
+
+# operations on long format DF
+cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
+
+time_format = "%Y"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_1996_Chile_NIR",
+    "scenario": "PRIMAP",
+}
+
+coords_terminologies_2006 = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "CHL-GHG-Inventory",
+    "provenance": "measured",
+    "area": "CHL",
+    "scenario": "BUR4"
+}
+
+coords_value_mapping = {
+    "entity": {
+        "COVDM": "NMVOC",
+        "CO₂ neto": "CO2",
+        "CH₄": "CH4",
+        # "HFC": "HFCS",
+        "HFC-125": "HFC125",
+        "HFC-134a": "HFC134a",
+        "HFC-143a": "HFC143a",
+        "HFC-152a": "HFC152a",
+        "HFC-227ea": "HFC227ea",
+        "HFC-23": "HFC23",
+        "HFC-236fa": "HFC236fa",
+        "HFC-245fa": "HFC245fa",
+        "HFC-32": "HFC32",
+        "HFC-365mfc": "HFC365mfc",
+        "HFC-43-10mee": "HFC4310mee",
+        "N₂O": "N2O",
+        # "PFC": "PFCS",
+        "PFC-116": "C2F6",
+        "PFC-14": "CF4",
+        "PFC-218": "C3F8",
+        # "SF₆": "SF6",
+        "SO₂": "SO2",
+    },
+    "unit": "PRIMAP1",
+}
+
+coords_value_filling = {
+    'category': {  # col to fill
+        'orig_cat_name': {  # col to fill from
+            'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
+            'Tanque internacional': 'M.BK',
+            'Aviación internacional': 'M.BK.A',
+            'Navegación internacional': 'M.BK.M',
+            'Operaciones multilaterales': 'M.MULTIOP',
+            'Emisiones de CO2 de la biomasa': 'M.BIO',
+        }
+    }
+}
+
+filter_remove = {
+    "f1": {
+        "entity": ["Absorciones CO₂", "Emisiones CO₂"],
+    },
+    "f2": {
+        "orig_cat_name": ["Partidas informativas"],
+    },
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/267936, https://snichile.mma.gob.cl/wp-content/uploads/2021/03/Inventario_Nacional_de_GEI-1990-2018.xlsx",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de.de",
+    "title": "Chile: BUR4",
+    "comment": "Read fom xlsx file by Johannes Gütschow",
+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+}
+
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# start data reading
+# ###
+df_all = None
+
+for year in years_to_read:
+    # read sheet for the year. Each sheet contains several tables,
+    # we only read the upper row as the other tables are summary tables
+    df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
+    # drop the columns which are empty and repetition of the metadata for the second block
+    df_current.drop(cols_to_drop, axis=1, inplace=True)
+    # drop all rows where the index cols (category code and name) are both NaN
+    # as without one of them there is no category information
+    df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
+    # set multi-index. necessary for the stack operation in the conversion to long format
+    df_current = df_current.set_index(index_cols)
+    # add unit row using information from entity row and add to index
+    df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
+    # actual conversion to long format
+    df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
+    # aggregate to one df
+    if df_all is None:
+        df_all = df_current
+    else:
+        df_all = pd.concat([df_all, df_current])
+
+df_all = df_all.reset_index(drop=True)
+
+# ###
+# postprocessing
+# ###
+# strip trailing and leading spaces
+for col in cols_for_space_stripping:
+    df_all[col] = df_all[col].str.strip()
+
+df_all["category"] = df_all["category"].str.rstrip('.')
+
+data_if = pm2.pm2io.convert_long_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    filter_keep=filter_keep,
+    meta_data=meta_data
+)
+
+# ###
+# save data to IF and native format
+# ###
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
+
+# ###
+# conversion to ipcc 2006 categories
+# ###
+
+data_if_2006 = pm2.pm2io.convert_long_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies_2006,
+    coords_value_mapping=coords_value_mapping,
+    coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    filter_keep=filter_keep,
+    meta_data=meta_data
+)
+
+cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
+filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
+data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
+
+# aggregate categories
+for cat_to_agg in aggregate_cats:
+    mask = data_if_2006[cat_label].isin(aggregate_cats[cat_to_agg]["sources"])
+    df_test = data_if_2006[mask]
+
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
+
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        data_if_2006 = pd.concat([data_if_2006, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
+
+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if)
+encoding = {var: compression for var in data_pm2_2006.data_vars}
+data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)