2 years ago · f6f54f85ec
--- a/code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
+++ b/code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
@@ -0,0 +1,65 @@
 
				+# most time series are contained twice and 2005 data is also contained twice. Some
			
 
				+# data is inconsistent and we remove the time series with errors
			
 
				+drop_data = {
			
 
				+    2: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				+        "cats": ["1", "1.A", "1.A.1", "1.A.1", "1.A.2", "1.A.3", "1.A.4", "1.A.5", "1.B", "1.B.1", "1.B.2",
			
 
				+                 "2", "2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H",
			
 
				+                 "3", "3.A", "3.B"],
			
 
				+        #"years": ["2005"], # 2005 data copy of 2019
			
 
				+    },
			
 
				+    3: { # individual sector time series are (mostly) wrong, leave only 0.EL timeseries
			
 
				+        "cats": ["3.C", "3.D", "3.E", "3.F", "3.G", "5", "5.A", "5.B", "5.C", "5.D", "6"]
			
 
				+        #"years": ["2005"],
			
 
				+    },
			
 
				+    6: { #2005 data copy of 2019
			
 
				+        "years": ["2005"],
			
 
				+    },
			
 
				+    7: { # 2005 data copy of 2019 for 3.G
			
 
				+        "years": ["2005"],
			
 
				+    },
			
 
				+    25: { # 2005 data copy of 2019 (CO2, 2005-2019, first table)
			
 
				+        "years": ["2005"],
			
 
				+    },
			
 
				+    26: { # 2005 data copy of 2019 (CO2, 2005-2019, second table)
			
 
				+        "years": ["2005"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+cat_mapping = {
			
 
				+    '3': 'M.AG',
			
 
				+    '3.A': '3.A.1',
			
 
				+    '3.B': '3.A.2',
			
 
				+    '3.C': '3.C.7', # rice
			
 
				+    '3.D': 'M.3.C.45AG', # Agricultural soils
			
 
				+    '3.E': '3.C.1.c', # prescribed burning of savanna
			
 
				+    '3.F': '3.C.1.b', # field burning of agricultural residues
			
 
				+    '3.G': '3.C.3', # urea application
			
 
				+    '4': 'M.LULUCF',
			
 
				+    '4.A': '3.B.1', # forest
			
 
				+    '4.B': '3.B.2', # cropland
			
 
				+    '4.C': '3.B.3', # grassland
			
 
				+    '4.D': '3.B.4', # wetland
			
 
				+    '4.E': '3.B.5', # Settlements
			
 
				+    '4.F': '3.B.6', # other land
			
 
				+    '4.G': '3.D.1', # HWP
			
 
				+    '5': '4',
			
 
				+    '5.A': '4.A',
			
 
				+    '5.B': '4.B',
			
 
				+    '5.C': '4.C',
			
 
				+    '5.D': '4.D',
			
 
				+    '6': '5',
			
 
				+}
			
 
				+
			
 
				+aggregate_cats = {
			
 
				+    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				+    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
			
 
				+    '3.C.1': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
			
 
				+    '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				+    'M.3.C.AG': {'sources': ['3.C.1.b', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				+    'M.3.C.LU': {'sources': ['3.C.1.c'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				+    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				+}
			
--- a/code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -0,0 +1,283 @@
 
				+# Montenegro BUR 3
			
 
				+# Code to read the emissions inventory contained in Montenegro's third BUR from pdf
			
 
				+# and convert into PRIMAP2 format
			
 
				+
			
 
				+# ###
			
 
				+# imports
			
 
				+# ###
			
 
				+import camelot
			
 
				+import primap2 as pm2
			
 
				+import pandas as pd
			
 
				+from pathlib import Path
			
 
				+import re
			
 
				+import copy
			
 
				+
			
 
				+from config_MNE_BUR3 import drop_data, cat_mapping, aggregate_cats
			
 
				+from primap2.pm2io._data_reading import matches_time_format
			
 
				+
			
 
				+# ###
			
 
				+# configuration
			
 
				+# ###
			
 
				+
			
 
				+# folders and files
			
 
				+root_path = Path(__file__).parents[3].absolute()
			
 
				+root_path = root_path.resolve()
			
 
				+downloaded_data_path = root_path / "downloaded_data"
			
 
				+extracted_data_path = root_path / "extracted_data"
			
 
				+
			
 
				+input_folder = downloaded_data_path / 'UNFCCC' / 'Montenegro' / 'BUR3'
			
 
				+output_folder = extracted_data_path / 'UNFCCC' / 'Montenegro'
			
 
				+output_filename = 'MNE_BUR3_2022_'
			
 
				+compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+inventory_file_pdf = 'NIR-2021_MNE_Finalversion.pdf'
			
 
				+
			
 
				+# reading and processing
			
 
				+years_to_read = range(1990, 2018 + 1)
			
 
				+pages_to_read = range(535,583)
			
 
				+
			
 
				+pos_entity = [0, 0]
			
 
				+cat_code_col = 0
			
 
				+cat_name_col = 1
			
 
				+regex_unit = r"\((.*)\)"
			
 
				+regex_entity = r"^(.*)\s\("
			
 
				+
			
 
				+gwp_to_use = 'AR4GWP100'
			
 
				+
			
 
				+# conversion to PRIMAP2 format
			
 
				+# manual category codes
			
 
				+cat_codes_manual = { # transform to PRIMAP1 form. PRIMAP2 form in next step with other codes
			
 
				+    'International bunkers': 'MBK',
			
 
				+    'Marine': 'MBKM',
			
 
				+    'Aviation': 'MBKA',
			
 
				+    'Multilateral operations': 'MMULTIOP',
			
 
				+}
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC1996_2006_MNE_Inv",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "MNE-GHG-inventory ",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "MNE",
			
 
				+    "scenario": "BUR3",
			
 
				+}
			
 
				+
			
 
				+coords_value_mapping = {
			
 
				+    'unit': 'PRIMAP1',
			
 
				+    'entity': {
			
 
				+        f"GHG {gwp_to_use}": f"KYOTOGHG {gwp_to_use}",
			
 
				+        f"HFC {gwp_to_use}": f"HFCS {gwp_to_use}",
			
 
				+        f"PFC {gwp_to_use}": f"PFCS {gwp_to_use}",
			
 
				+    },
			
 
				+    'category': {
			
 
				+        'Total national GHG emissions (with LULUCF)': '0',
			
 
				+        'Total national GHG emissions (without LULUCF)': 'M.0.EL',
			
 
				+        'International Bunkers': 'M.BK',
			
 
				+        '1.A.3.a.i': 'M.BK.A',
			
 
				+        '1.A.3.d.i': 'M.BK.M',
			
 
				+        'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+filter_remove = {
			
 
				+    "f1": {
			
 
				+        "category": ["Memo items"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/documents/461972",
			
 
				+    "rights": "",
			
 
				+    "contact": "mail@johannes-guetschow.de",
			
 
				+    "title": "Montenegro. Biennial update report (BUR). BUR 3. National inventory report.",
			
 
				+    "comment": "Read fom pdf file by Johannes Gütschow",
			
 
				+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
			
 
				+}
			
 
				+
			
 
				+# ###
			
 
				+# Read all time series table from pdf
			
 
				+# ###
			
 
				+tables = camelot.read_pdf(str(input_folder / inventory_file_pdf), pages=','.join([str(page) for page in pages_to_read]), flavor='lattice')
			
 
				+
			
 
				+# ###
			
 
				+# process tables and combine them using the pm2 pr.merge function
			
 
				+# ###
			
 
				+data_all = None
			
 
				+for i, table in enumerate(tables):
			
 
				+    df_current_table = table.df.copy(deep=True)
			
 
				+    # get entity and unit
			
 
				+    entity_unit = df_current_table.iloc[0, 0]
			
 
				+    match = re.search(regex_unit, entity_unit)
			
 
				+    unit = match.group(1)
			
 
				+    match = re.search(regex_entity, entity_unit)
			
 
				+    entity = match.group(1)
			
 
				+    if "CO2 equivalent" in unit:
			
 
				+        entity = f"{entity} ({gwp_to_use})"
			
 
				+        unit_parts = unit.split(" ")
			
 
				+        unit = f"{unit_parts[0]} CO2eq"
			
 
				+
			
 
				+    # remove "/n" from category code and name columns
			
 
				+    df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
			
 
				+    df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
			
 
				+
			
 
				+    # fix header
			
 
				+    df_current_table.iloc[0, 0] = "category"
			
 
				+    df_current_table.iloc[0, 1] = "orig_cat_name"
			
 
				+    df_current_table.columns = df_current_table.iloc[0]
			
 
				+    df_current_table = df_current_table.drop(0, axis=0)
			
 
				+
			
 
				+    # remove ',' in numbers
			
 
				+    years = df_current_table.columns[2:]
			
 
				+    repl = lambda m: m.group('part1') + m.group('part2')
			
 
				+    for year in years:
			
 
				+        df_current_table.loc[:, year] = df_current_table.loc[:, year].str.replace(
			
 
				+            '(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				+
			
 
				+    # add entity and unit cols
			
 
				+    df_current_table["entity"] = entity
			
 
				+    df_current_table["unit"] = unit
			
 
				+
			
 
				+    if i in drop_data:
			
 
				+        to_drop = drop_data[i]
			
 
				+        if "cats" in to_drop.keys():
			
 
				+            mask = df_current_table["category"].isin(to_drop["cats"])
			
 
				+            df_current_table = df_current_table.drop(df_current_table[mask].index,
			
 
				+                                                     axis=0)
			
 
				+        if "years" in to_drop.keys():
			
 
				+            df_current_table = df_current_table.drop(columns=to_drop["years"])
			
 
				+
			
 
				+    df_current_table["category"] = df_current_table["category"].fillna(
			
 
				+        value=df_current_table["orig_cat_name"])
			
 
				+
			
 
				+    df_current_table = df_current_table.drop(columns="orig_cat_name")
			
 
				+
			
 
				+    df_current_table_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_current_table,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+    )
			
 
				+
			
 
				+    current_table_pm2 = pm2.pm2io.from_interchange_format(df_current_table_IF)
			
 
				+
			
 
				+    if data_all is None:
			
 
				+        data_all = current_table_pm2
			
 
				+    else:
			
 
				+        data_all = data_all.pr.merge(current_table_pm2, tolerance=0.001)
			
 
				+
			
 
				+    print(f"{entity}, {unit}: {years[0]}-{years[-1]}")
			
 
				+
			
 
				+# ###
			
 
				+# postprocessing
			
 
				+# ###
			
 
				+
			
 
				+# convert to mass units from CO2eq
			
 
				+entities_to_convert = ['N2O', 'SF6', 'CH4']
			
 
				+entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
			
 
				+
			
 
				+for entity in entities_to_convert:
			
 
				+    converted = data_all[entity].pr.convert_to_mass()
			
 
				+    basic_entity = entity.split(" ")[0]
			
 
				+    converted = converted.to_dataset(name=basic_entity)
			
 
				+    data_all = data_all.pr.merge(converted)
			
 
				+    data_all[basic_entity].attrs["entity"] = basic_entity
			
 
				+
			
 
				+# drop the GWP data
			
 
				+data_all = data_all.drop_vars(entities_to_convert)
			
 
				+
			
 
				+# convert back to IF
			
 
				+data_if = data_all.pr.to_interchange_format()
			
 
				+
			
 
				+# ###
			
 
				+# convert to IPCC2006 categories
			
 
				+# ###
			
 
				+data_if_2006 = copy.deepcopy(data_if)
			
 
				+data_if_2006.attrs = copy.deepcopy(data_if.attrs)
			
 
				+
			
 
				+# map categories
			
 
				+data_if_2006 = data_if_2006.replace(
			
 
				+    {f"category ({coords_terminologies['category']})": cat_mapping})
			
 
				+data_if_2006[f"category ({coords_terminologies['category']})"].unique()
			
 
				+
			
 
				+# rename the category col
			
 
				+data_if_2006.rename(columns={
			
 
				+    f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
			
 
				+                    inplace=True)
			
 
				+data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				+data_if_2006.attrs['dimensions']['*'] = [
			
 
				+    'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
			
 
				+    else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				+# aggregate categories
			
 
				+for cat_to_agg in aggregate_cats:
			
 
				+    mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				+        aggregate_cats[cat_to_agg]["sources"])
			
 
				+    df_test = data_if_2006[mask]
			
 
				+    # print(df_test)
			
 
				+
			
 
				+    if len(df_test) > 0:
			
 
				+        print(f"Aggregating category {cat_to_agg}")
			
 
				+        df_combine = df_test.copy(deep=True)
			
 
				+
			
 
				+        time_format = '%Y'
			
 
				+        time_columns = [
			
 
				+            col
			
 
				+            for col in df_combine.columns.values
			
 
				+            if matches_time_format(col, time_format)
			
 
				+        ]
			
 
				+
			
 
				+        for col in time_columns:
			
 
				+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				+
			
 
				+        df_combine = df_combine.groupby(
			
 
				+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				+                'unit']).sum(min_count=1)
			
 
				+
			
 
				+        df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				+        # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+        # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				+
			
 
				+        df_combine = df_combine.reset_index()
			
 
				+
			
 
				+        data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
			
 
				+        data_if_2006 = data_if_2006.reset_index(drop=True)
			
 
				+    else:
			
 
				+        print(f"no data to aggregate category {cat_to_agg}")
			
 
				+
			
 
				+# conversion to PRIMAP2 native format
			
 
				+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				+
			
 
				+# convert back to IF to have units in the fixed format
			
 
				+data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# save data to IF and native format
			
 
				+# ###
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+# data in original categories
			
 
				+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				+
			
 
				+encoding = {var: compression for var in data_all.data_vars}
			
 
				+data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				+
			
 
				+# data in 2006 categories
			
 
				+pm2.pm2io.write_interchange_format(output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+
			
 
				+encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				+data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
			
--- a/code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -5,6 +5,7 @@ import pandas as pd
 
				 import primap2 as pm2
			
 
				 from pathlib import Path
			
 
				 import camelot
			
 
				+import copy
			
 
				 
			
 
				 from primap2.pm2io._data_reading import matches_time_format
			
 
				 
			
@@ -337,6 +338,7 @@ cat_mapping = {
 
				 aggregate_cats = {
			
 
				     '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
			
 
				               'name': 'Other Process uses of Carbonates'},
			
 
				+    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				     '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
			
 
				               'name': 'Emissions from Biomass Burning'},
			
 
				     '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
@@ -355,8 +357,8 @@ aggregate_cats = {
 
				                  'name': 'Agriculture excluding livestock emissions'},
			
 
				 }
			
 
				 
			
 
				-data_if_2006 = data_all_if.copy(deep=True)
			
 
				-data_if_2006
			
 
				+data_if_2006 = copy.deepcopy(data_all_if)
			
 
				+data_if_2006.attrs = copy.deepcopy(data_all_if.attrs)
			
 
				 
			
 
				 # map categories
			
 
				 data_if_2006 = data_if_2006.replace({'category (IPCC1996_2006_THA_Inv)': cat_mapping})
			
--- a/code/UNFCCC_reader/folder_mapping.json
+++ b/code/UNFCCC_reader/folder_mapping.json
@@ -7,5 +7,6 @@
 
				     "MAR": "Morocco",
			
 
				     "COL": "Colombia",
			
 
				     "CHL": "Chile",
			
 
				+    "MNE": "Montenegro",
			
 
				     "IDN": "Indonesia"
			
 
				 }
			
--- a/extracted_data/UNFCCC/folder_mapping.json
+++ b/extracted_data/UNFCCC/folder_mapping.json
@@ -40,6 +40,7 @@
 
				     "AUS": "Australia",
			
 
				     "POL": "Poland",
			
 
				     "EUA": "European_Union",
			
 
				+    "MNE": "Montenegro",
			
 
				     "HUN": "Hungary",
			
 
				     "SVK": "Slovakia",
			
 
				     "EST": "Estonia",