1 éve · 1b1a611b0a
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,11 @@
 
				 .idea
			
 
				+.DS_Store
			
 
				 venv
			
 
				 geckodriver.log
			
 
				 __pycache__
			
 
				 /JG_test_code/
			
 
				 .doit.db
			
 
				+.doit.db.db
			
 
				 log/*
			
 
				 UNFCCC_GHG_data/datasets
			
 
				 UNFCCC_GHG_data/UNFCCC_DI_reader/test_UNFCCC_DI_reader.ipynb
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Burundi/config_BDI_BUR1.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Burundi/config_BDI_BUR1.py
@@ -0,0 +1,217 @@
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+# define config dict
			
 
				+inv_conf = {
			
 
				+    "entity_row": 0,
			
 
				+    "unit_row": 1,
			
 
				+    "index_cols": "Greenhouse gas source and sink categories",
			
 
				+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
			
 
				+    "header": [
			
 
				+        "Greenhouse gas source and sink categories",
			
 
				+        "CO2",
			
 
				+        "CH4",
			
 
				+        "N2O",
			
 
				+        "HFCs",
			
 
				+        "PFCs",
			
 
				+        "SF6",
			
 
				+        "other halogenated gases",
			
 
				+        "Other halogenated gases without CO2 equivalent conversion factors",
			
 
				+        "NOx",
			
 
				+        "CO",
			
 
				+        "NMVOCs",
			
 
				+        "SO2",
			
 
				+    ],
			
 
				+    "unit": [
			
 
				+        "-",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+        "GgCO2eq",
			
 
				+        "GgCO2eq",
			
 
				+        "GgCO2eq",
			
 
				+        "GgCO2eq",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+        "Gg",
			
 
				+    ],
			
 
				+    "cat_codes_manual": {
			
 
				+        "Memo Items (5)": "MEMO",
			
 
				+        "International Bunkers": "M.BK",
			
 
				+        "1.A.3.a.i - International Aviation (International Bunkers) (1)": "M.BK.A",
			
 
				+        "1.A.3.d.i - International water-borne navigation (International bunkers) (1)": "M.BK.M",
			
 
				+        # TODO: Handle with regex instead of explicitly adding all options.
			
 
				+        "1.A.3.d.i - International water-borne navigation (International                      bunkers) (1)": "M.BK.M",
			
 
				+        "1.A.3.d.i - International water-borne navigation (International bunkers)                      (1)": "M.BK.M",
			
 
				+        "1.A.5.c - Multilateral Operations (1)(2)": "M.MULTIOP",
			
 
				+        "Total National Emissions and Removals": "0",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+inv_conf_per_year = {
			
 
				+    "2005": {
			
 
				+        "pages_to_read": ["197", "198", "199", "200"],
			
 
				+    },
			
 
				+    "2006": {
			
 
				+        "pages_to_read": ["201", "202", "203", "204"],
			
 
				+    },
			
 
				+    "2007": {
			
 
				+        "pages_to_read": ["205", "206", "207", "208"],
			
 
				+    },
			
 
				+    "2008": {
			
 
				+        "pages_to_read": ["209", "210", "211", "212"],
			
 
				+    },
			
 
				+    "2009": {
			
 
				+        "pages_to_read": ["213", "214", "215", "216"],
			
 
				+    },
			
 
				+    "2010": {
			
 
				+        "pages_to_read": ["221", "222", "223", "224"],
			
 
				+    },
			
 
				+    "2011": {
			
 
				+        "pages_to_read": ["225", "226", "227", "228"],
			
 
				+    },
			
 
				+    "2012": {
			
 
				+        "pages_to_read": ["229", "230", "231", "232"],
			
 
				+    },
			
 
				+    "2013": {
			
 
				+        "pages_to_read": ["233", "234", "235", "236"],
			
 
				+    },
			
 
				+    "2014": {
			
 
				+        "pages_to_read": ["237", "238", "239", "240"],
			
 
				+    },
			
 
				+    "2015": {
			
 
				+        "pages_to_read": ["241", "242", "243", "244"],
			
 
				+        # Some values move to wrong columns
			
 
				+        "fix_values": [
			
 
				+            (2, 10, "21,529"),
			
 
				+            (1, 12, "NMVOCs"),
			
 
				+            (2, 12, "0"),
			
 
				+        ],
			
 
				+        # for this table an additional column is created
			
 
				+        # that needs to be deleted
			
 
				+        "delete_columns": [11],
			
 
				+    },
			
 
				+    "2016": {
			
 
				+        "pages_to_read": ["245", "246", "247", "248"],
			
 
				+    },
			
 
				+    "2017": {
			
 
				+        "pages_to_read": ["249", "250", "251", "252"],
			
 
				+    },
			
 
				+    "2018": {
			
 
				+        "pages_to_read": ["253", "254", "255", "256"],
			
 
				+    },
			
 
				+    "2019": {
			
 
				+        "pages_to_read": ["257", "258", "259", "260"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+# primap2 format conversion
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "BDI-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "BDI",
			
 
				+    "scenario": "BUR1",
			
 
				+}
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+# Page 64: The global warming potentials (GWPs) recommended by the IPCC Fifth Assessment Report (AR5)
			
 
				+# and based on the annex to Decision 18/CMA.1 have been used to convert GHGs other than CO2
			
 
				+# into their equivalent. These GWPs provide a consistent basis for comparing the relative effect
			
 
				+# of emissions of all GHGs standardized over a 100-year period by converting emissions of other
			
 
				+# GHGs into those of CO2. The values adopted for the three direct GHGs are 1 for CO2, 28 for CH4
			
 
				+# and 265 for N2O.
			
 
				+gwp_to_use = "AR5GWP100"
			
 
				+coords_value_mapping = {
			
 
				+    "unit": "PRIMAP1",
			
 
				+    "category": "PRIMAP1",
			
 
				+    "entity": {
			
 
				+        "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+        "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+        "SF6": f"SF6 ({gwp_to_use})",
			
 
				+        "NMVOCs": "NMVOC",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+filter_remove = {
			
 
				+    "f_memo": {"category": "MEMO"},
			
 
				+    "f_empty": {"category": ""},
			
 
				+    # "f1": {
			
 
				+    #     "entity": ["Other halogenated gases with CO2 equivalent conversion factors"],
			
 
				+    # },
			
 
				+    "f2": {
			
 
				+        "entity": ["Other halogenated gases without CO2 equivalent conversion factors"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/documents/611668",
			
 
				+    "rights": "",  # unknown
			
 
				+    "contact": "daniel-busch@climate-resource.de",
			
 
				+    "title": "Burundi. Biennial update report (BUR). BUR1",
			
 
				+    "comment": "Read fom pdf by Daniel Busch",
			
 
				+    "institution": "UNFCCC",
			
 
				+}
			
 
				+
			
 
				+country_processing_step1 = {
			
 
				+    "aggregate_cats": {
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "3.C.1",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+                "3.C.8",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land "
			
 
				+            "(Agriculture)",
			
 
				+        },
			
 
				+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG", "M.3.D.AG"],
			
 
				+            "name": "Agriculture excluding livestock",
			
 
				+        },
			
 
				+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
			
 
				+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4", "5"],
			
 
				+            "name": "National total emissions excluding LULUCF",
			
 
				+        },
			
 
				+    },
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+gas_baskets = {
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				+}
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Burundi/read_BDI_BUR1_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Burundi/read_BDI_BUR1_from_pdf.py
@@ -0,0 +1,226 @@
 
				+import camelot
			
 
				+import primap2 as pm2
			
 
				+import pandas as pd
			
 
				+
			
 
				+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from UNFCCC_GHG_data.helper.functions import process_data_for_country
			
 
				+
			
 
				+from config_BDI_BUR1 import (
			
 
				+    inv_conf,
			
 
				+    meta_data,
			
 
				+    filter_remove,
			
 
				+    coords_value_mapping,
			
 
				+    coords_terminologies,
			
 
				+    coords_defaults,
			
 
				+    coords_cols,
			
 
				+    gas_baskets,
			
 
				+    country_processing_step1,
			
 
				+    inv_conf_per_year,
			
 
				+)
			
 
				+
			
 
				+# ###
			
 
				+# configuration
			
 
				+# ###
			
 
				+
			
 
				+input_folder = downloaded_data_path / "UNFCCC" / "Burundi" / "BUR1"
			
 
				+output_folder = extracted_data_path / "UNFCCC" / "Burundi"
			
 
				+
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+pdf_file = "Burundi_BUR_1_Report__Francais.pdf"
			
 
				+output_filename = "BDI_BUR1_2023_"
			
 
				+category_column = f"category ({coords_terminologies['category']})"
			
 
				+compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+# ###
			
 
				+# 1. Read in tables
			
 
				+# ###
			
 
				+
			
 
				+df_all = None
			
 
				+for year in inv_conf_per_year.keys():
			
 
				+    print("-" * 60)
			
 
				+    print(f"Reading year {year}.")
			
 
				+    print("-" * 60)
			
 
				+    df_year = None
			
 
				+    for page in inv_conf_per_year[year]["pages_to_read"]:
			
 
				+        print(f"Reading table from page {page}.")
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=page,
			
 
				+            flavor="lattice",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+        print("Reading complete.")
			
 
				+
			
 
				+        df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+        if df_year is None:
			
 
				+            df_year = df_page
			
 
				+        else:
			
 
				+            df_year = pd.concat(
			
 
				+                [df_year, df_page],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print(f"Concatenating all tables for {year}.")
			
 
				+    # remove line breaks
			
 
				+    for column in df_year.columns:
			
 
				+        df_year[column] = df_year[column].str.replace("\n", "")
			
 
				+
			
 
				+    # fix broken values in cells
			
 
				+    if "fix_values" in inv_conf_per_year[year].keys():
			
 
				+        for index, column, value in inv_conf_per_year[year]["fix_values"]:
			
 
				+            df_year.at[index, column] = value
			
 
				+
			
 
				+    # delete extra columns
			
 
				+    if "delete_columns" in inv_conf_per_year[year].keys():
			
 
				+        for column in inv_conf_per_year[year]["delete_columns"]:
			
 
				+            df_year = df_year.drop(columns=column)
			
 
				+        df_year.columns = range(df_year.columns.size)
			
 
				+
			
 
				+    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+
			
 
				+    df_year = pd.concat([df_header, df_year[2:]], axis=0, join="outer").reset_index(
			
 
				+        drop=True
			
 
				+    )
			
 
				+
			
 
				+    df_year = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_year,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+
			
 
				+    # set index
			
 
				+    df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_year, year, inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    # extract from tuple
			
 
				+    df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    # make a copy of the categories row
			
 
				+    df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				+
			
 
				+    # replace cat names by codes in col "category"
			
 
				+    # first the manual replacements
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
			
 
				+
			
 
				+    df_year_long["category"] = df_year_long["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]
			
 
				+    )
			
 
				+
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				+
			
 
				+    # then the regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_year_long = df_year_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_year_long["data"] = df_year_long["data"].str.replace(",", ".")
			
 
				+
			
 
				+    # TODO: I don't think there are NE1 in the tables.
			
 
				+    # df_year_long["data"] = df_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_year_long.columns = df_year_long.columns.map(str)
			
 
				+
			
 
				+    df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_all is None:
			
 
				+        df_all = df_year_long
			
 
				+    else:
			
 
				+        df_all = pd.concat(
			
 
				+            [df_all, df_year_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+### convert to interchange format ###
			
 
				+print("Converting to interchange format.")
			
 
				+df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_all,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping,
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+print("Converting to primap2 format.")
			
 
				+data_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# Save raw data to IF and native format.
			
 
				+# ###
			
 
				+
			
 
				+data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+    data_if,
			
 
				+)
			
 
				+
			
 
				+encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+data_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+    encoding=encoding,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# Processing
			
 
				+# ###
			
 
				+
			
 
				+data_proc_pm2 = process_data_for_country(
			
 
				+    data_country=data_pm2,
			
 
				+    entities_to_ignore=[],
			
 
				+    gas_baskets=gas_baskets,
			
 
				+    filter_dims=None,
			
 
				+    cat_terminology_out=None,
			
 
				+    category_conversion=None,
			
 
				+    sectors_out=None,
			
 
				+    processing_info_country=country_processing_step1,
			
 
				+)
			
 
				+
			
 
				+# ###
			
 
				+# save processed data to IF and native format
			
 
				+# ###
			
 
				+
			
 
				+terminology_proc = coords_terminologies["category"]
			
 
				+
			
 
				+data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+)
			
 
				+
			
 
				+encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+data_proc_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+)
			
 
				+
			
 
				+print("Saved processed data.")
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Guinea/config_GIN_BUR1.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Guinea/config_GIN_BUR1.py
@@ -0,0 +1,457 @@
 
				+# primap2 format conversion
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "GIN-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "GIN",
			
 
				+    "scenario": "BUR1",
			
 
				+}
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+# gwp conversion is mentioned on page 20 in the report
			
 
				+gwp_to_use = "AR4GWP100"
			
 
				+coords_value_mapping = {
			
 
				+    "main": {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "category": "PRIMAP1",
			
 
				+        "entity": {
			
 
				+            "HFCs": f"HFCS ({gwp_to_use})",
			
 
				+            "PFCs": f"PFCS ({gwp_to_use})",
			
 
				+            "SF6": f"SF6 ({gwp_to_use})",
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+        },
			
 
				+    },
			
 
				+    "energy": {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "category": "PRIMAP1",
			
 
				+        "entity": {
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+        },
			
 
				+    },
			
 
				+    "afolu": {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "category": "PRIMAP1",
			
 
				+        "entity": {
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+        },
			
 
				+    },
			
 
				+    "waste": {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "category": "PRIMAP1",
			
 
				+        "entity": {
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+        },
			
 
				+    },
			
 
				+    "trend": {
			
 
				+        "unit": "PRIMAP1",
			
 
				+        "category": "PRIMAP1",
			
 
				+        "entity": {
			
 
				+            "NMVOCs": "NMVOC",
			
 
				+        },
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+filter_remove = {
			
 
				+    "f_memo": {"category": "MEMO"},
			
 
				+}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/documents/629549",
			
 
				+    "rights": "",  # unknown
			
 
				+    "contact": "daniel-busch@climate-resource.de",
			
 
				+    "title": "Guinea. Biennial update report (BUR). BUR1",
			
 
				+    "comment": "Read fom pdf by Daniel Busch",
			
 
				+    "institution": "UNFCCC",
			
 
				+}
			
 
				+
			
 
				+page_def_templates = {
			
 
				+    "110": {
			
 
				+        "area": ["36,718,589,87"],
			
 
				+        "cols": ["290,340,368,392,425,445,465,497,535,564"],
			
 
				+    },
			
 
				+    "111": {
			
 
				+        "area": ["36,736,587,107"],
			
 
				+        "cols": ["293,335,369,399,424,445,468,497,535,565"],
			
 
				+    },
			
 
				+    "112": {
			
 
				+        "area": ["35,733,588,106"],
			
 
				+        "cols": ["293,335,369,399,424,445,468,497,535,565"],
			
 
				+    },
			
 
				+    "113": {
			
 
				+        "area": ["35,733,588,106"],
			
 
				+        "cols": ["293,335,365,399,424,445,468,497,535,565"],
			
 
				+    },
			
 
				+    "131": {
			
 
				+        "area": ["36,718,590,83"],
			
 
				+        "cols": ["293,332,370,406,442,480,516,554"],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+# for main table
			
 
				+header_inventory = [
			
 
				+    "Greenhouse gas source and sink categories",
			
 
				+    "CO2",
			
 
				+    "CH4",
			
 
				+    "N2O",
			
 
				+    "HFCs",
			
 
				+    "PFCs",
			
 
				+    "SF6",
			
 
				+    "NOx",
			
 
				+    "CO",
			
 
				+    "NMVOCs",
			
 
				+    "SO2",
			
 
				+]
			
 
				+
			
 
				+unit_inventory = ["-"] + ["Gg"] * len(
			
 
				+    header_inventory
			
 
				+)  # one extra for the category columns
			
 
				+unit_inventory[4] = "GgCO2eq"
			
 
				+unit_inventory[5] = "GgCO2eq"
			
 
				+unit_inventory[6] = "GgCO2eq"
			
 
				+
			
 
				+# for energy tables
			
 
				+header_energy = [
			
 
				+    "Greenhouse gas source and sink categories",
			
 
				+    "CO2",
			
 
				+    "CH4",
			
 
				+    "N2O",
			
 
				+    "NOx",
			
 
				+    "CO",
			
 
				+    "NMVOCs",
			
 
				+    "SO2",
			
 
				+]
			
 
				+unit_energy = ["-"] + ["Gg"] * len(header_energy)  # one extra for the category columns
			
 
				+
			
 
				+# for afolu tables
			
 
				+header_afolu = [
			
 
				+    "Greenhouse gas source and sink categories",
			
 
				+    "CO2",
			
 
				+    "CH4",
			
 
				+    "N2O",
			
 
				+    "NOx",
			
 
				+    "CO",
			
 
				+    "NMVOCs",
			
 
				+]
			
 
				+unit_afolu = ["-"] + ["Gg"] * (len(header_afolu) - 1)
			
 
				+
			
 
				+# for waste table
			
 
				+header_waste = [
			
 
				+    "Greenhouse gas source and sink categories",
			
 
				+    "CO2",
			
 
				+    "CH4",
			
 
				+    "N2O",
			
 
				+    "NOx",
			
 
				+    "CO",
			
 
				+    "NMVOCs",
			
 
				+    "SO2",
			
 
				+]
			
 
				+unit_waste = ["-"] + ["Gg"] * (len(header_waste) - 1)
			
 
				+
			
 
				+# for trend table (unit is always Gg for this table)
			
 
				+# 'data' prefix is needed for pd.wide_to_long() later
			
 
				+header_trend = [
			
 
				+    "orig_cat_name",
			
 
				+    "data1990",
			
 
				+    "data1995",
			
 
				+    "data2000",
			
 
				+    "data2005",
			
 
				+    "data2010",
			
 
				+    "data2015",
			
 
				+    "data2018",
			
 
				+    "data2019",
			
 
				+]
			
 
				+
			
 
				+set_value = {
			
 
				+    "main": {
			
 
				+        "110": [
			
 
				+            (4, 0, "1.A.1 - Industries énergétiques"),
			
 
				+            (8, 0, "1.A.4 - Autres secteurs"),
			
 
				+        ],
			
 
				+        "111": [
			
 
				+            (4, 0, "1.A.1 - Industries énergétiques"),
			
 
				+            (8, 0, "1.A.4 - Autres secteurs"),
			
 
				+        ],
			
 
				+        "112": [
			
 
				+            (4, 0, "1.A.1 - Industries énergétiques"),
			
 
				+            (8, 0, "1.A.4 - Autres secteurs"),
			
 
				+        ],
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+delete_row = {"main": {"110": [3, 7], "111": [3, 7], "112": [3, 7]}}
			
 
				+
			
 
				+delete_rows_by_category = {
			
 
				+    "energy": {
			
 
				+        "116": [
			
 
				+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
			
 
				+            "Éléments pour information",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
			
 
				+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
			
 
				+        ],
			
 
				+        "117": [
			
 
				+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
			
 
				+            "Éléments pour information",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
			
 
				+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
			
 
				+        ],
			
 
				+        "118": [
			
 
				+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
			
 
				+            "Éléments pour information",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
			
 
				+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
			
 
				+        ],
			
 
				+        "119": [
			
 
				+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
			
 
				+            "Information Items",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
			
 
				+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
			
 
				+        ],
			
 
				+    },
			
 
				+    "trend": {
			
 
				+        # The categories 3.D / 3.D.1 / 3.D.2 contain values different to the main table
			
 
				+        # They should also not contain negative values according to IPCC methodology:
			
 
				+        # https://www.ipcc-nggip.iges.or.jp/public/2006gl/
			
 
				+        # Therefore, the rows are deleted from the table.
			
 
				+        "131": [
			
 
				+            "3.D - Autres",
			
 
				+            "3.D.1 - Produits ligneux récoltés",
			
 
				+            "3.D.2 - Autres (veuillez spécifier)",
			
 
				+        ],
			
 
				+        # Delete empty line for pages 132-137.
			
 
				+        "132": [""],
			
 
				+        "133": [""],
			
 
				+        "134": [""],
			
 
				+        "135": [""],
			
 
				+        "136": [""],
			
 
				+        "137": [""],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+# define config dict
			
 
				+inv_conf = {
			
 
				+    "header": header_inventory,
			
 
				+    "unit": unit_inventory,
			
 
				+    "header_energy": header_energy,
			
 
				+    "unit_energy": unit_energy,
			
 
				+    "header_afolu": header_afolu,
			
 
				+    "unit_afolu": unit_afolu,
			
 
				+    "header_waste": header_waste,
			
 
				+    "unit_waste": unit_waste,
			
 
				+    "header_trend": header_trend,
			
 
				+    "entity_row": 0,
			
 
				+    "unit_row": 1,
			
 
				+    "index_cols": "Greenhouse gas source and sink categories",
			
 
				+    "pages_to_read": {
			
 
				+        "main": ["110", "111", "112", "113"],
			
 
				+        "energy": ["116", "117", "118", "119"],
			
 
				+        "afolu": ["124", "125", "126", "127"],
			
 
				+        "waste": ["128", "130"],
			
 
				+        # The table for CO (page 135) seems completely mixed up and should not be considered.
			
 
				+        # The total CO values for 1990 equal the values in the main table.
			
 
				+        # The total CO values for 1995 equal the values for 2000 in the main table.
			
 
				+        # The total CO values for 2000 equal the values for 2010 in the main table.
			
 
				+        # The total CO values for 2005 are identical to the 2019 values in the same table.
			
 
				+        # The total CO values for 2010 are identical to the 1990 values in the same table.
			
 
				+        # The total CO values for 2019 are identical to the 1995 values in the same table.
			
 
				+        # And so on.
			
 
				+        "trend": ["131", "132", "133", "134", "136", "137"],
			
 
				+    },
			
 
				+    "entity_for_page": {"trend": ["CO2", "CH4", "N2O", "NOx", "NMVOCs", "SO2"]},
			
 
				+    "year": {
			
 
				+        "110": 1990,
			
 
				+        "111": 2000,
			
 
				+        "112": 2010,
			
 
				+        "113": 2019,
			
 
				+        "116": 1990,
			
 
				+        "117": 2000,
			
 
				+        "118": 2010,
			
 
				+        "119": 2019,
			
 
				+        "124": 1990,
			
 
				+        "125": 2000,
			
 
				+        "126": 2010,
			
 
				+        "127": 2019,
			
 
				+    },
			
 
				+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
			
 
				+    "cat_codes_manual": {
			
 
				+        "main": {
			
 
				+            "Éléments pour mémoire": "MEMO",
			
 
				+            "Soutes internationales": "M.BK",
			
 
				+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
			
 
				+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
			
 
				+            "Total des émissions et absorptions nationales": "0",
			
 
				+            "2A5: Autre": "2A5",
			
 
				+        },
			
 
				+        "energy": {
			
 
				+            "International Bunkers": "M.BK",
			
 
				+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
			
 
				+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
			
 
				+            "CO2 from Biomass Combustion for Energy Production": "M.BIO",
			
 
				+        },
			
 
				+        "trend": {
			
 
				+            "Total des émissions et absorptions nationales": "0",
			
 
				+            "2A5: Autre": "2A5",
			
 
				+            "Éléments pour mémoire": "MEMO",
			
 
				+            "Soutes internationales": "M.BK",
			
 
				+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
			
 
				+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
			
 
				+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
			
 
				+        },
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+country_processing_step1 = {
			
 
				+    "aggregate_cats": {
			
 
				+        "M.3.C.AG": {
			
 
				+            "sources": [
			
 
				+                "3.C.1",
			
 
				+                "3.C.2",
			
 
				+                "3.C.3",
			
 
				+                "3.C.4",
			
 
				+                "3.C.5",
			
 
				+                "3.C.6",
			
 
				+                "3.C.7",
			
 
				+                "3.C.8",
			
 
				+            ],
			
 
				+            "name": "Aggregate sources and non-CO2 emissions sources on land "
			
 
				+            "(Agriculture)",
			
 
				+        },
			
 
				+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
			
 
				+        "M.AG.ELV": {
			
 
				+            "sources": ["M.3.C.AG", "M.3.D.AG"],
			
 
				+            "name": "Agriculture excluding livestock",
			
 
				+        },
			
 
				+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
			
 
				+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
			
 
				+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
			
 
				+        "M.0.EL": {
			
 
				+            "sources": ["1", "2", "M.AG", "4"],
			
 
				+            "name": "National total emissions excluding LULUCF",
			
 
				+        },
			
 
				+    },
			
 
				+    "basket_copy": {
			
 
				+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        "entities": ["HFCS", "PFCS"],
			
 
				+        "source_GWP": gwp_to_use,
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+gas_baskets = {
			
 
				+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
			
 
				+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
			
 
				+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
			
 
				+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
			
 
				+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
			
 
				+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
			
 
				+}
			
 
				+
			
 
				+replace_info = {
			
 
				+    "main": [
			
 
				+        ("3", "CO", "2019", 27.406),
			
 
				+        ("3.C", "CO", "2019", 27.406),
			
 
				+        ("3.C.1", "CO", "2019", 27.406),
			
 
				+        ("3", "N2O", "1990", 2.190),
			
 
				+        ("3", "NOx", "2019", 1.644),
			
 
				+        ("3.C", "NOx", "2019", 1.644),
			
 
				+        ("3.C.1", "NOx", "2019", 1.644),
			
 
				+        ("M.BK", "NOx", "1990", 0.001),
			
 
				+        ("M.BK", "NOx", "2000", 0.003),
			
 
				+        ("M.BK", "NOx", "2010", 0.052),
			
 
				+        ("M.BK", "CO", "1990", 0.0002),
			
 
				+        ("M.BK", "CO", "2000", 0.0006),
			
 
				+        ("M.BK", "CO", "2010", 0.01),
			
 
				+        ("M.BK", "NMVOC", "1990", 0.0001),
			
 
				+        ("M.BK", "NMVOC", "2000", 0.0002),
			
 
				+        ("M.BK", "NMVOC", "2010", 0.003),
			
 
				+    ],
			
 
				+    "trend": [
			
 
				+        ("M.BK", "CH4", "1990"),
			
 
				+        ("M.BK.A", "CH4", "1990"),
			
 
				+        ("M.BK", "CH4", "2000"),
			
 
				+        ("M.BK.A", "CH4", "2000"),
			
 
				+        ("M.BK", "CH4", "2010"),
			
 
				+        ("M.BK.A", "CH4", "2010"),
			
 
				+        ("1.A.2", "N2O", "1990"),
			
 
				+        ("M.BK", "N2O", "1990"),
			
 
				+        ("M.BK.A", "N2O", "1990"),
			
 
				+        ("M.BK", "N2O", "2000"),
			
 
				+        ("M.BK.A", "N2O", "2000"),
			
 
				+        ("M.BK", "N2O", "2010"),
			
 
				+        ("M.BK.A", "N2O", "2010"),
			
 
				+        ("M.BK", "N2O", "2019"),
			
 
				+        ("M.BK.A", "N2O", "2019"),
			
 
				+        ("M.BK", "NOx", "1990"),
			
 
				+        ("M.BK", "NOx", "2000"),
			
 
				+        ("M.BK", "NOx", "2010"),
			
 
				+        ("3.C", "NOx", "2019"),
			
 
				+        ("3.C.1", "NOx", "2019"),
			
 
				+        ("3", "NOx", "2019"),
			
 
				+        ("1.A.2", "NMVOC", "1990"),
			
 
				+        ("M.BK", "NMVOC", "1990"),
			
 
				+        ("0", "NMVOC", "2000"),
			
 
				+        ("1", "NMVOC", "2000"),
			
 
				+        ("1.A", "NMVOC", "2000"),
			
 
				+        ("1.A.1", "NMVOC", "2000"),
			
 
				+        ("1.A.2", "NMVOC", "2000"),
			
 
				+        ("1.A.3", "NMVOC", "2000"),
			
 
				+        ("1.A.4", "NMVOC", "2000"),
			
 
				+        ("2", "NMVOC", "2000"),
			
 
				+        ("2.H", "NMVOC", "2000"),
			
 
				+        ("2.H.2", "NMVOC", "2000"),
			
 
				+        ("M.BK", "NMVOC", "2000"),
			
 
				+        ("0", "NMVOC", "2010"),
			
 
				+        ("1", "NMVOC", "2010"),
			
 
				+        ("1.A", "NMVOC", "2010"),
			
 
				+        ("1.A.1", "NMVOC", "2010"),
			
 
				+        ("1.A.2", "NMVOC", "2010"),
			
 
				+        ("1.A.3", "NMVOC", "2010"),
			
 
				+        ("1.A.4", "NMVOC", "2010"),
			
 
				+        ("2", "NMVOC", "2010"),
			
 
				+        ("M.BK", "NMVOC", "2010"),
			
 
				+        ("1.A.2", "NMVOC", "2019"),
			
 
				+    ],
			
 
				+}
			
 
				+
			
 
				+replace_categories = {
			
 
				+    "afolu": {
			
 
				+        "124-126": [
			
 
				+            (17, "3.A.2.a.i - Vaches laitières"),
			
 
				+            (18, "3.A.2.a.ii - Autres bovins"),
			
 
				+            (19, "3.A.2.b - Buffle"),
			
 
				+            (20, "3.A.2.c - Ovins"),
			
 
				+            (21, "3.A.2.d - Caprins"),
			
 
				+            (22, "3.A.2.e - Chameaux"),
			
 
				+            (23, "3.A.2.f - Chevaux"),
			
 
				+            (24, "3.A.2.g - Mules et ânes"),
			
 
				+            (25, "3.A.2.h - Porcins"),
			
 
				+            (26, "3.A.2.i - Volailles"),
			
 
				+        ],
			
 
				+        "127": [
			
 
				+            (19, "3.A.2.a.i - Vaches laitières"),
			
 
				+            (20, "3.A.2.a.ii - Autres bovins"),
			
 
				+            (21, "3.A.2.b - Buffle"),
			
 
				+            (22, "3.A.2.c - Ovins"),
			
 
				+            (23, "3.A.2.d - Caprins"),
			
 
				+            (24, "3.A.2.e - Chameaux"),
			
 
				+            (25, "3.A.2.f - Chevaux"),
			
 
				+            (26, "3.A.2.g - Mules et ânes"),
			
 
				+            (27, "3.A.2.h - Porcins"),
			
 
				+            (28, "3.A.2.i - Volailles"),
			
 
				+            (29, "3.A.2.j - Autres (préciser)"),
			
 
				+        ],
			
 
				+    }
			
 
				+}
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Guinea/read_GIN_BUR1_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Guinea/read_GIN_BUR1_from_pdf.py
@@ -0,0 +1,679 @@
 
				+import camelot
			
 
				+import primap2 as pm2
			
 
				+import pandas as pd
			
 
				+
			
 
				+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from UNFCCC_GHG_data.helper.functions import process_data_for_country
			
 
				+from UNFCCC_GHG_data.helper.functions_temp import find_and_replace_values
			
 
				+from config_GIN_BUR1 import coords_cols, coords_defaults, coords_terminologies
			
 
				+from config_GIN_BUR1 import (
			
 
				+    coords_value_mapping,
			
 
				+    filter_remove,
			
 
				+    meta_data,
			
 
				+    page_def_templates,
			
 
				+    delete_rows_by_category,
			
 
				+)
			
 
				+from config_GIN_BUR1 import (
			
 
				+    inv_conf,
			
 
				+    country_processing_step1,
			
 
				+    gas_baskets,
			
 
				+    replace_info,
			
 
				+    replace_categories,
			
 
				+    set_value,
			
 
				+    delete_row,
			
 
				+)
			
 
				+
			
 
				+# ###
			
 
				+# configuration
			
 
				+# ###
			
 
				+
			
 
				+input_folder = downloaded_data_path / "UNFCCC" / "Guinea" / "BUR1"
			
 
				+output_folder = extracted_data_path / "UNFCCC" / "Guinea"
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+pdf_file = "Rapport_IGES-Guinee-BUR1_VF.pdf"
			
 
				+output_filename = "GIN_BUR1_2023_"
			
 
				+category_column = f"category ({coords_terminologies['category']})"
			
 
				+compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+# ###
			
 
				+# 1. Read in main tables
			
 
				+# ###
			
 
				+
			
 
				+df_main = None
			
 
				+for page in inv_conf["pages_to_read"]["main"]:
			
 
				+    print("-" * 45)
			
 
				+    print(f"Reading table from page {page}.")
			
 
				+
			
 
				+    tables_inventory_original = camelot.read_pdf(
			
 
				+        str(input_folder / pdf_file),
			
 
				+        pages=page,
			
 
				+        table_areas=page_def_templates[page]["area"],
			
 
				+        columns=page_def_templates[page]["cols"],
			
 
				+        flavor="stream",
			
 
				+        split_text=True,
			
 
				+    )
			
 
				+
			
 
				+    print("Reading complete.")
			
 
				+
			
 
				+    df_inventory = tables_inventory_original[0].df.copy()
			
 
				+
			
 
				+    # set category names (they moved one row up)
			
 
				+    if page in set_value["main"].keys():
			
 
				+        for idx, col, value in set_value["main"][page]:
			
 
				+            df_inventory.at[idx, col] = value
			
 
				+    # delete empty row
			
 
				+    if page in delete_row["main"].keys():
			
 
				+        for idx in delete_row["main"][page]:
			
 
				+            df_inventory = df_inventory.drop(index=idx)
			
 
				+
			
 
				+    # add header and unit
			
 
				+    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+    df_inventory = pd.concat(
			
 
				+        [df_header, df_inventory], axis=0, join="outer"
			
 
				+    ).reset_index(drop=True)
			
 
				+    df_inventory = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_inventory,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+
			
 
				+    # set index
			
 
				+    df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_inventory, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    # extract category from tuple
			
 
				+    df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
			
 
				+
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]["main"]
			
 
				+    )
			
 
				+
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(".", "")
			
 
				+
			
 
				+    # regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_inventory_long["data"] = df_inventory_long["data"].str.replace(",", ".")
			
 
				+    df_inventory_long["data"] = df_inventory_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_inventory_long.columns = df_inventory_long.columns.map(str)
			
 
				+    df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_main is None:
			
 
				+        df_main = df_inventory_long
			
 
				+    else:
			
 
				+        df_main = pd.concat(
			
 
				+            [df_main, df_inventory_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+print("Converting to interchange format.")
			
 
				+df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_main,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping["main"],
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+df_all_IF = find_and_replace_values(
			
 
				+    df=df_all_IF, replace_info=replace_info["main"], category_column=category_column
			
 
				+)
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+
			
 
				+# ###
			
 
				+# 2. Read energy sector tables
			
 
				+# ###
			
 
				+
			
 
				+df_energy = None
			
 
				+for page in inv_conf["pages_to_read"]["energy"]:
			
 
				+    print("-" * 45)
			
 
				+    print(f"Reading table from page {page}.")
			
 
				+
			
 
				+    tables_inventory_original = camelot.read_pdf(
			
 
				+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+    )
			
 
				+
			
 
				+    print("Reading complete.")
			
 
				+
			
 
				+    df_energy_year = pd.concat(
			
 
				+        [tables_inventory_original[0].df[2:], tables_inventory_original[1].df[3:]],
			
 
				+        axis=0,
			
 
				+        join="outer",
			
 
				+    ).reset_index(drop=True)
			
 
				+
			
 
				+    # TODO This step should be done in pm2.pm2io.convert_long_dataframe_if()
			
 
				+    for row in delete_rows_by_category["energy"][page]:
			
 
				+        row_to_delete = df_energy_year.index[df_energy_year[0] == row][0]
			
 
				+        df_energy_year = df_energy_year.drop(index=row_to_delete)
			
 
				+
			
 
				+    # add header and unit
			
 
				+    df_header = pd.DataFrame([inv_conf["header_energy"], inv_conf["unit_energy"]])
			
 
				+
			
 
				+    df_energy_year = pd.concat(
			
 
				+        [df_header, df_energy_year], axis=0, join="outer"
			
 
				+    ).reset_index(drop=True)
			
 
				+
			
 
				+    df_energy_year = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_energy_year,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+    # set index
			
 
				+    df_energy_year = df_energy_year.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_energy_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_energy_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    # extract from tuple
			
 
				+    df_energy_year_long["orig_cat_name"] = df_energy_year_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    # make a copy of the categories row
			
 
				+    df_energy_year_long["category"] = df_energy_year_long["orig_cat_name"]
			
 
				+
			
 
				+    # replace cat names by codes in col "category"
			
 
				+    # first the manual replacements
			
 
				+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+        "\n", ""
			
 
				+    )
			
 
				+    df_energy_year_long["category"] = df_energy_year_long["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]["energy"]
			
 
				+    )
			
 
				+
			
 
				+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+        ".", ""
			
 
				+    )
			
 
				+
			
 
				+    # then the regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_energy_year_long = df_energy_year_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace(",", ".")
			
 
				+    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_energy_year_long.columns = df_energy_year_long.columns.map(str)
			
 
				+    df_energy_year_long = df_energy_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_energy is None:
			
 
				+        df_energy = df_energy_year_long
			
 
				+    else:
			
 
				+        df_energy = pd.concat(
			
 
				+            [df_energy, df_energy_year_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+print("Converting to interchange format.")
			
 
				+df_energy_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_energy,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping["energy"],
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)
			
 
				+
			
 
				+# ###
			
 
				+# 3. Read in afolu table
			
 
				+# ###
			
 
				+
			
 
				+df_afolu = None
			
 
				+for page in inv_conf["pages_to_read"]["afolu"]:
			
 
				+    print("-" * 45)
			
 
				+    print(f"Reading table from page {page}.")
			
 
				+
			
 
				+    tables_inventory_original = camelot.read_pdf(
			
 
				+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+    )
			
 
				+    print("Reading complete.")
			
 
				+
			
 
				+    if page == "127":
			
 
				+        # table on page 127 has one extra row at the top
			
 
				+        # and one extra category 3.A.1.j
			
 
				+        df_afolu_year = tables_inventory_original[0].df[3:]
			
 
				+        # 3.A.1.a.i to 3.A.1.j exist twice.
			
 
				+        # Rename duplicate categories in tables.
			
 
				+        for index, category_name in replace_categories["afolu"]["127"]:
			
 
				+            df_afolu_year.at[index, 0] = category_name
			
 
				+    else:
			
 
				+        # cut first two lines
			
 
				+        df_afolu_year = tables_inventory_original[0].df[2:]
			
 
				+        # On pages 124-126 the wrong categories are slightly different
			
 
				+        for index, category_name in replace_categories["afolu"]["124-126"]:
			
 
				+            df_afolu_year.at[index, 0] = category_name
			
 
				+
			
 
				+    # add header and unit
			
 
				+    df_header = pd.DataFrame([inv_conf["header_afolu"], inv_conf["unit_afolu"]])
			
 
				+
			
 
				+    df_afolu_year = pd.concat(
			
 
				+        [df_header, df_afolu_year], axis=0, join="outer"
			
 
				+    ).reset_index(drop=True)
			
 
				+
			
 
				+    df_afolu_year = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_afolu_year,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+
			
 
				+    # set index
			
 
				+    df_afolu_year = df_afolu_year.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_afolu_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_afolu_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    df_afolu_year_long["orig_cat_name"] = df_afolu_year_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    # make a copy of the categories row
			
 
				+    df_afolu_year_long["category"] = df_afolu_year_long["orig_cat_name"]
			
 
				+
			
 
				+    # regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_afolu_year_long["category"] = df_afolu_year_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_afolu_year_long = df_afolu_year_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace(",", ".")
			
 
				+    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_afolu_year_long.columns = df_afolu_year_long.columns.map(str)
			
 
				+    df_afolu_year_long = df_afolu_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_afolu is None:
			
 
				+        df_afolu = df_afolu_year_long
			
 
				+    else:
			
 
				+        df_afolu = pd.concat(
			
 
				+            [df_afolu, df_afolu_year_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+print("Converting to interchange format.")
			
 
				+df_afolu_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_afolu,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping["afolu"],
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+data_pm2_afolu = pm2.pm2io.from_interchange_format(df_afolu_IF)
			
 
				+
			
 
				+# ###
			
 
				+# 4. Read in Waste tables - pages 128, 130
			
 
				+# ###
			
 
				+
			
 
				+# There are three tables for three years on page 128
			
 
				+# and another table for the last year on page 130
			
 
				+
			
 
				+# read the first three tables
			
 
				+page = inv_conf["pages_to_read"]["waste"][0]
			
 
				+tables_inventory_original_128 = camelot.read_pdf(
			
 
				+    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+)
			
 
				+
			
 
				+# read last table
			
 
				+page = inv_conf["pages_to_read"]["waste"][1]
			
 
				+tables_inventory_original_130 = camelot.read_pdf(
			
 
				+    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+)
			
 
				+
			
 
				+# combine in a dict
			
 
				+df_waste_years = {
			
 
				+    "1990": tables_inventory_original_128[0].df,
			
 
				+    "2000": tables_inventory_original_128[1].df,
			
 
				+    "2010": tables_inventory_original_128[2].df,
			
 
				+    "2019": tables_inventory_original_130[0].df,
			
 
				+}
			
 
				+
			
 
				+df_waste = None
			
 
				+for year in df_waste_years.keys():
			
 
				+    print("-" * 45)
			
 
				+    print(f"Processing table for {year}.")
			
 
				+
			
 
				+    df_waste_year = df_waste_years[year][2:]
			
 
				+
			
 
				+    # add header and unit
			
 
				+    df_header = pd.DataFrame([inv_conf["header_waste"], inv_conf["unit_waste"]])
			
 
				+
			
 
				+    df_waste_year = pd.concat(
			
 
				+        [df_header, df_waste_year], axis=0, join="outer"
			
 
				+    ).reset_index(drop=True)
			
 
				+
			
 
				+    df_waste_year = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_waste_year,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+
			
 
				+    # set index
			
 
				+    df_waste_year = df_waste_year.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_waste_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_waste_year, year, inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    df_waste_year_long["orig_cat_name"] = df_waste_year_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    # make a copy of the categories row
			
 
				+    df_waste_year_long["category"] = df_waste_year_long["orig_cat_name"]
			
 
				+
			
 
				+    # regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_waste_year_long = df_waste_year_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(".", "")
			
 
				+    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace(",", ".")
			
 
				+    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_waste_year_long.columns = df_waste_year_long.columns.map(str)
			
 
				+    df_waste_year_long = df_waste_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_waste is None:
			
 
				+        df_waste = df_waste_year_long
			
 
				+    else:
			
 
				+        df_waste = pd.concat(
			
 
				+            [df_waste, df_waste_year_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+print("Converting to interchange format.")
			
 
				+df_waste_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_waste,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping["waste"],
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+data_pm2_waste = pm2.pm2io.from_interchange_format(df_waste_IF)
			
 
				+
			
 
				+# ###
			
 
				+# 5. Read in trend tables - pages 131 - 137
			
 
				+# ###
			
 
				+
			
 
				+df_trend = None
			
 
				+pages = inv_conf["pages_to_read"]["trend"]
			
 
				+entities = inv_conf["entity_for_page"]["trend"]
			
 
				+
			
 
				+# for this set of tables every page is a different entity
			
 
				+for page, entity in zip(pages, entities):
			
 
				+    print("-" * 45)
			
 
				+    print(f"Reading table for page {page} and entity {entity}.")
			
 
				+
			
 
				+    # First table must be read in with flavor="stream", as
			
 
				+    # flavor="lattice" raises an error. Maybe camelot issue
			
 
				+    # see https://github.com/atlanhq/camelot/issues/306,
			
 
				+    # or because characters in first row almost touch
			
 
				+    # the table grid.
			
 
				+    if page == "131":
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=page,
			
 
				+            table_areas=page_def_templates[page]["area"],
			
 
				+            columns=page_def_templates[page]["cols"],
			
 
				+            flavor="stream",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+
			
 
				+        df_trend_entity = tables_inventory_original[0].df[1:]
			
 
				+
			
 
				+    else:
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+        )
			
 
				+        df_trend_entity = tables_inventory_original[0].df[3:]
			
 
				+
			
 
				+    print("Reading complete.")
			
 
				+
			
 
				+    if page in delete_rows_by_category["trend"].keys():
			
 
				+        for category in delete_rows_by_category["trend"][page]:
			
 
				+            row_to_delete = df_trend_entity.index[df_trend_entity[0] == category][0]
			
 
				+            df_trend_entity = df_trend_entity.drop(index=row_to_delete)
			
 
				+
			
 
				+    df_trend_entity.columns = inv_conf["header_trend"]
			
 
				+
			
 
				+    df_trend_entity = df_trend_entity.copy()
			
 
				+
			
 
				+    # unit is always Gg
			
 
				+    df_trend_entity.loc[:, "unit"] = "Gg"
			
 
				+
			
 
				+    # only one entity per table
			
 
				+    df_trend_entity.loc[:, "entity"] = entity
			
 
				+
			
 
				+    df_trend_entity.loc[:, "category"] = df_trend_entity["orig_cat_name"]
			
 
				+
			
 
				+    df_trend_entity["category"] = df_trend_entity["category"].replace(
			
 
				+        inv_conf["cat_codes_manual"]["trend"]
			
 
				+    )
			
 
				+
			
 
				+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+        ".", ""
			
 
				+    )
			
 
				+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+        "\n", ""
			
 
				+    )
			
 
				+
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_trend_entity = df_trend_entity.reset_index(drop=True)
			
 
				+
			
 
				+    print("Created category codes.")
			
 
				+
			
 
				+    for year in inv_conf["header_trend"][1:]:
			
 
				+        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(",", ".")
			
 
				+        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_trend_entity.columns = df_trend_entity.columns.map(str)
			
 
				+
			
 
				+    df_trend_entity = df_trend_entity.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    # TODO better to use pm2.pm2io.convert_wide_dataframe_if
			
 
				+    df_trend_entity_long = pd.wide_to_long(
			
 
				+        df_trend_entity, stubnames="data", i="category", j="time"
			
 
				+    )
			
 
				+
			
 
				+    print("Converted to long format.")
			
 
				+
			
 
				+    df_trend_entity_long = df_trend_entity_long.reset_index()
			
 
				+
			
 
				+    if df_trend is None:
			
 
				+        df_trend = df_trend_entity_long
			
 
				+    else:
			
 
				+        df_trend = pd.concat(
			
 
				+            [df_trend, df_trend_entity_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+print("Converting to interchange format.")
			
 
				+
			
 
				+df_trend_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+    df_trend,
			
 
				+    coords_cols=coords_cols,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping["trend"],
			
 
				+    filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+)
			
 
				+
			
 
				+df_trend_IF = find_and_replace_values(
			
 
				+    df=df_trend_IF, replace_info=replace_info["trend"], category_column=category_column
			
 
				+)
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				+
			
 
				+# ###
			
 
				+# Combine tables
			
 
				+# ###
			
 
				+
			
 
				+# merge main and energy
			
 
				+# There are discrepancies larger than 0.86 for area category 1.A.2, entity NMVOC,
			
 
				+# years 1990, 2000, 2010, 2019
			
 
				+# It is assumed the main table has the correct values.
			
 
				+print("Merging main and energy table.")
			
 
				+data_pm2 = data_pm2_main.pr.merge(data_pm2_energy, tolerance=1)
			
 
				+
			
 
				+# merge afolu
			
 
				+print("Merging afolu table.")
			
 
				+data_pm2 = data_pm2.pr.merge(data_pm2_afolu, tolerance=0.11)
			
 
				+
			
 
				+# merge waste
			
 
				+# increasing tolerance to merge values for 4.C, 1990, N2O - 0.003 in sector table, 0.0034 in main table
			
 
				+print("Merging waste table.")
			
 
				+data_pm2 = data_pm2.pr.merge(data_pm2_waste, tolerance=0.15)
			
 
				+
			
 
				+# merge trend
			
 
				+print("Merging trend table.")
			
 
				+data_pm2 = data_pm2.pr.merge(data_pm2_trend, tolerance=0.11)
			
 
				+
			
 
				+# convert back to IF to have units in the fixed format ( per year / per a / per annum)
			
 
				+data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+# ###
			
 
				+# Save raw data to IF and native format.
			
 
				+# ###
			
 
				+
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+    data_if,
			
 
				+)
			
 
				+
			
 
				+encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+data_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+    encoding=encoding,
			
 
				+)
			
 
				+
			
 
				+# ###
			
 
				+# Processing
			
 
				+# ###
			
 
				+
			
 
				+data_proc_pm2 = process_data_for_country(
			
 
				+    data_country=data_pm2,
			
 
				+    entities_to_ignore=[],
			
 
				+    gas_baskets=gas_baskets,
			
 
				+    filter_dims=None,  # leaving this explicit for now
			
 
				+    cat_terminology_out=None,
			
 
				+    category_conversion=None,
			
 
				+    sectors_out=None,
			
 
				+    processing_info_country=country_processing_step1,
			
 
				+)
			
 
				+
			
 
				+# ###
			
 
				+# save processed data to IF and native format
			
 
				+# ###
			
 
				+
			
 
				+terminology_proc = coords_terminologies["category"]
			
 
				+
			
 
				+data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+)
			
 
				+
			
 
				+encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+data_proc_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+)
			
 
				+
			
 
				+print("Saved processed data.")
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
+++ b/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
@@ -8,10 +8,12 @@
 
				     "NGA": "Nigeria",
			
 
				     "MAR": "Morocco",
			
 
				     "COL": "Colombia",
			
 
				+    "GIN": "Guinea",
			
 
				     "CHL": "Chile",
			
 
				     "PER": "Peru",
			
 
				     "MYS": "Malaysia",
			
 
				     "MNE": "Montenegro",
			
 
				     "ISR": "Israel",
			
 
				-    "IDN": "Indonesia"
			
 
				+    "IDN": "Indonesia",
			
 
				+    "BDI": "Burundi"
			
 
				 }
			
--- a/UNFCCC_GHG_data/helper/functions_temp.py
+++ b/UNFCCC_GHG_data/helper/functions_temp.py
@@ -0,0 +1,55 @@
 
				+"""Temporary file for new functions to avoid merging issues due to different automatic formatting. Delete after merge."""
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+
			
 
				+def find_and_replace_values(
			
 
				+    df: pd.DataFrame,
			
 
				+    replace_info: list[tuple[str | float]],
			
 
				+    category_column: str,
			
 
				+    entity_column: str = "entity",
			
 
				+) -> pd.DataFrame:
			
 
				+    """
			
 
				+    Find values and replace single values in a dataframe.
			
 
				+
			
 
				+    Input
			
 
				+    -----
			
 
				+    df
			
 
				+        Input data frame
			
 
				+    replace_info
			
 
				+        Category, entity, year, and new value. Don't put a new value if you would like to replace with nan.
			
 
				+        For example [("3.C", "CO", "2019", 3.423)] or [("3.C", "CO", "2019")]
			
 
				+    category_column
			
 
				+        The name of the column that contains the categories.
			
 
				+    entity_column
			
 
				+        The name of the column that contains the categories.
			
 
				+
			
 
				+    Output
			
 
				+    ------
			
 
				+        Data frame with updated values.
			
 
				+
			
 
				+    """
			
 
				+    for replace_info_value in replace_info:
			
 
				+        category = replace_info_value[0]
			
 
				+        entity = replace_info_value[1]
			
 
				+        year = replace_info_value[2]
			
 
				+
			
 
				+        if len(replace_info_value) == 4:
			
 
				+            new_value = replace_info_value[3]
			
 
				+        elif len(replace_info_value) == 3:
			
 
				+            new_value = np.nan
			
 
				+        else:
			
 
				+            raise AssertionError(
			
 
				+                f"Expected tuple of length 3 or 4. Got {replace_info_value}"
			
 
				+            )
			
 
				+
			
 
				+        index = df.loc[
			
 
				+            (df[category_column] == category) & (df[entity_column] == entity),
			
 
				+        ].index[0]
			
 
				+
			
 
				+        # pandas recommends using .at[] for changing single values
			
 
				+        df.at[index, year] = new_value
			
 
				+        print(f"Set value for {category}, {entity}, {year} to {new_value}.")
			
 
				+
			
 
				+    return df
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.csv
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.csv
@@ -0,0 +1 @@
 
				+../../../.git/annex/objects/PK/xF/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.nc
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.nc
@@ -0,0 +1 @@
 
				+../../../.git/annex/objects/J6/kq/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.yaml
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.yaml
@@ -0,0 +1,22 @@
 
				+attrs:
			
 
				+  references: https://unfccc.int/documents/611668
			
 
				+  rights: ''
			
 
				+  contact: daniel-busch@climate-resource.de
			
 
				+  title: Burundi. Biennial update report (BUR). BUR1 Processed on 2024-04-16
			
 
				+  comment: Read fom pdf by Daniel Busch Processed on 2024-04-16
			
 
				+  institution: UNFCCC
			
 
				+  cat: category (IPCC2006_PRIMAP)
			
 
				+  area: area (ISO3)
			
 
				+  scen: scenario (PRIMAP)
			
 
				+time_format: '%Y'
			
 
				+dimensions:
			
 
				+  '*':
			
 
				+  - time
			
 
				+  - category (IPCC2006_PRIMAP)
			
 
				+  - area (ISO3)
			
 
				+  - source
			
 
				+  - scenario (PRIMAP)
			
 
				+  - provenance
			
 
				+  - entity
			
 
				+  - unit
			
 
				+data_file: BDI_BUR1_2023_IPCC2006_PRIMAP.csv
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.csv
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.csv
@@ -0,0 +1 @@
 
				+../../../.git/annex/objects/WF/7x/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.nc
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.nc
@@ -0,0 +1 @@
 
				+../../../.git/annex/objects/xv/7q/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc
			
--- a/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.yaml
+++ b/extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.yaml
@@ -0,0 +1,22 @@
 
				+attrs:
			
 
				+  references: https://unfccc.int/documents/611668
			
 
				+  rights: ''
			
 
				+  contact: daniel-busch@climate-resource.de
			
 
				+  title: Burundi. Biennial update report (BUR). BUR1
			
 
				+  comment: Read fom pdf by Daniel Busch
			
 
				+  institution: UNFCCC
			
 
				+  cat: category (IPCC2006_PRIMAP)
			
 
				+  area: area (ISO3)
			
 
				+  scen: scenario (PRIMAP)
			
 
				+time_format: '%Y'
			
 
				+dimensions:
			
 
				+  '*':
			
 
				+  - time
			
 
				+  - category (IPCC2006_PRIMAP)
			
 
				+  - area (ISO3)
			
 
				+  - source
			
 
				+  - scenario (PRIMAP)
			
 
				+  - provenance
			
 
				+  - entity
			
 
				+  - unit
			
 
				+data_file: BDI_BUR1_2023_IPCC2006_PRIMAP_raw.csv
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.csv
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.csv
@@ -0,0 +1 @@
 
				+/annex/objects/MD5E-s204594--a04b7a0db8398441177fdb164e5e2114.csv
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.nc
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.nc
@@ -0,0 +1 @@
 
				+/annex/objects/MD5E-s231196--7fbb7b4b58db901bc953231000cb5cb1.nc
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.yaml
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.yaml
@@ -0,0 +1,22 @@
 
				+attrs:
			
 
				+  references: https://unfccc.int/documents/629549
			
 
				+  rights: ''
			
 
				+  contact: daniel-busch@climate-resource.de
			
 
				+  title: Guinea. Biennial update report (BUR). BUR1 Processed on 2024-04-16
			
 
				+  comment: Read fom pdf by Daniel Busch Processed on 2024-04-16
			
 
				+  institution: UNFCCC
			
 
				+  cat: category (IPCC2006_PRIMAP)
			
 
				+  area: area (ISO3)
			
 
				+  scen: scenario (PRIMAP)
			
 
				+time_format: '%Y'
			
 
				+dimensions:
			
 
				+  '*':
			
 
				+  - time
			
 
				+  - category (IPCC2006_PRIMAP)
			
 
				+  - source
			
 
				+  - area (ISO3)
			
 
				+  - provenance
			
 
				+  - scenario (PRIMAP)
			
 
				+  - entity
			
 
				+  - unit
			
 
				+data_file: GIN_BUR1_2023_IPCC2006_PRIMAP.csv
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.csv
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.csv
@@ -0,0 +1 @@
 
				+/annex/objects/MD5E-s86243--6b88e6c39832467ab21383324926c679.csv
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.nc
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.nc
@@ -0,0 +1 @@
 
				+/annex/objects/MD5E-s108241--60115b2f44c314b243cfa3a64c324dcd.nc
			
--- a/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.yaml
+++ b/extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.yaml
@@ -0,0 +1,22 @@
 
				+attrs:
			
 
				+  references: https://unfccc.int/documents/629549
			
 
				+  rights: ''
			
 
				+  contact: daniel-busch@climate-resource.de
			
 
				+  title: Guinea. Biennial update report (BUR). BUR1
			
 
				+  comment: Read fom pdf by Daniel Busch
			
 
				+  institution: UNFCCC
			
 
				+  cat: category (IPCC2006_PRIMAP)
			
 
				+  area: area (ISO3)
			
 
				+  scen: scenario (PRIMAP)
			
 
				+time_format: '%Y'
			
 
				+dimensions:
			
 
				+  '*':
			
 
				+  - time
			
 
				+  - category (IPCC2006_PRIMAP)
			
 
				+  - source
			
 
				+  - area (ISO3)
			
 
				+  - provenance
			
 
				+  - scenario (PRIMAP)
			
 
				+  - entity
			
 
				+  - unit
			
 
				+data_file: GIN_BUR1_2023_IPCC2006_PRIMAP_raw.csv
			
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,7 +31,7 @@ packages =
 
				     UNFCCC_GHG_data.UNFCCC_DI_reader
			
 
				     UNFCCC_GHG_data.helper
			
 
				 #UNFCCC_GHG_data.datasets
			
 
				-python_requires = >=3.8
			
 
				+python_requires = >=3.8, <3.11
			
 
				 setup_requires =
			
 
				     setuptools_scm
			
 
				 install_requires =
		`@@ -0,0 +1 @@`
		`+../../../.git/annex/objects/PK/xF/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv`
		`@@ -0,0 +1 @@`
		`+../../../.git/annex/objects/J6/kq/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc`
		`@@ -0,0 +1 @@`
		`+../../../.git/annex/objects/WF/7x/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv`
		`@@ -0,0 +1 @@`
		`+../../../.git/annex/objects/xv/7q/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc`
		`@@ -0,0 +1 @@`
		`+/annex/objects/MD5E-s204594--a04b7a0db8398441177fdb164e5e2114.csv`
		`@@ -0,0 +1 @@`
		`+/annex/objects/MD5E-s231196--7fbb7b4b58db901bc953231000cb5cb1.nc`
		`@@ -0,0 +1 @@`
		`+/annex/objects/MD5E-s86243--6b88e6c39832467ab21383324926c679.csv`
		`@@ -0,0 +1 @@`
		`+/annex/objects/MD5E-s108241--60115b2f44c314b243cfa3a64c324dcd.nc`