Forráskód Böngészése

Merge remote-tracking branch 'refs/remotes/origin/main' into fix_folder_mapping

Johannes 11 hónapja
szülő
commit
1b1a611b0a

+ 2 - 0
.gitignore

@@ -1,9 +1,11 @@
 .idea
+.DS_Store
 venv
 geckodriver.log
 __pycache__
 /JG_test_code/
 .doit.db
+.doit.db.db
 log/*
 UNFCCC_GHG_data/datasets
 UNFCCC_GHG_data/UNFCCC_DI_reader/test_UNFCCC_DI_reader.ipynb

+ 217 - 0
UNFCCC_GHG_data/UNFCCC_reader/Burundi/config_BDI_BUR1.py

@@ -0,0 +1,217 @@
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+# define config dict
+inv_conf = {
+    "entity_row": 0,
+    "unit_row": 1,
+    "index_cols": "Greenhouse gas source and sink categories",
+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
+    "header": [
+        "Greenhouse gas source and sink categories",
+        "CO2",
+        "CH4",
+        "N2O",
+        "HFCs",
+        "PFCs",
+        "SF6",
+        "other halogenated gases",
+        "Other halogenated gases without CO2 equivalent conversion factors",
+        "NOx",
+        "CO",
+        "NMVOCs",
+        "SO2",
+    ],
+    "unit": [
+        "-",
+        "Gg",
+        "Gg",
+        "Gg",
+        "GgCO2eq",
+        "GgCO2eq",
+        "GgCO2eq",
+        "GgCO2eq",
+        "Gg",
+        "Gg",
+        "Gg",
+        "Gg",
+        "Gg",
+    ],
+    "cat_codes_manual": {
+        "Memo Items (5)": "MEMO",
+        "International Bunkers": "M.BK",
+        "1.A.3.a.i - International Aviation (International Bunkers) (1)": "M.BK.A",
+        "1.A.3.d.i - International water-borne navigation (International bunkers) (1)": "M.BK.M",
+        # TODO: Handle with regex instead of explicitly adding all options.
+        "1.A.3.d.i - International water-borne navigation (International                      bunkers) (1)": "M.BK.M",
+        "1.A.3.d.i - International water-borne navigation (International bunkers)                      (1)": "M.BK.M",
+        "1.A.5.c - Multilateral Operations (1)(2)": "M.MULTIOP",
+        "Total National Emissions and Removals": "0",
+    },
+}
+
+inv_conf_per_year = {
+    "2005": {
+        "pages_to_read": ["197", "198", "199", "200"],
+    },
+    "2006": {
+        "pages_to_read": ["201", "202", "203", "204"],
+    },
+    "2007": {
+        "pages_to_read": ["205", "206", "207", "208"],
+    },
+    "2008": {
+        "pages_to_read": ["209", "210", "211", "212"],
+    },
+    "2009": {
+        "pages_to_read": ["213", "214", "215", "216"],
+    },
+    "2010": {
+        "pages_to_read": ["221", "222", "223", "224"],
+    },
+    "2011": {
+        "pages_to_read": ["225", "226", "227", "228"],
+    },
+    "2012": {
+        "pages_to_read": ["229", "230", "231", "232"],
+    },
+    "2013": {
+        "pages_to_read": ["233", "234", "235", "236"],
+    },
+    "2014": {
+        "pages_to_read": ["237", "238", "239", "240"],
+    },
+    "2015": {
+        "pages_to_read": ["241", "242", "243", "244"],
+        # Some values move to wrong columns
+        "fix_values": [
+            (2, 10, "21,529"),
+            (1, 12, "NMVOCs"),
+            (2, 12, "0"),
+        ],
+        # for this table an additional column is created
+        # that needs to be deleted
+        "delete_columns": [11],
+    },
+    "2016": {
+        "pages_to_read": ["245", "246", "247", "248"],
+    },
+    "2017": {
+        "pages_to_read": ["249", "250", "251", "252"],
+    },
+    "2018": {
+        "pages_to_read": ["253", "254", "255", "256"],
+    },
+    "2019": {
+        "pages_to_read": ["257", "258", "259", "260"],
+    },
+}
+
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "BDI-GHG-Inventory",
+    "provenance": "measured",
+    "area": "BDI",
+    "scenario": "BUR1",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+# Page 64: The global warming potentials (GWPs) recommended by the IPCC Fifth Assessment Report (AR5)
+# and based on the annex to Decision 18/CMA.1 have been used to convert GHGs other than CO2
+# into their equivalent. These GWPs provide a consistent basis for comparing the relative effect
+# of emissions of all GHGs standardized over a 100-year period by converting emissions of other
+# GHGs into those of CO2. The values adopted for the three direct GHGs are 1 for CO2, 28 for CH4
+# and 265 for N2O.
+gwp_to_use = "AR5GWP100"
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+    "entity": {
+        "HFCs": f"HFCS ({gwp_to_use})",
+        "PFCs": f"PFCS ({gwp_to_use})",
+        "SF6": f"SF6 ({gwp_to_use})",
+        "NMVOCs": "NMVOC",
+    },
+}
+
+filter_remove = {
+    "f_memo": {"category": "MEMO"},
+    "f_empty": {"category": ""},
+    # "f1": {
+    #     "entity": ["Other halogenated gases with CO2 equivalent conversion factors"],
+    # },
+    "f2": {
+        "entity": ["Other halogenated gases without CO2 equivalent conversion factors"],
+    },
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/611668",
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Burundi. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+country_processing_step1 = {
+    "aggregate_cats": {
+        "M.3.C.AG": {
+            "sources": [
+                "3.C.1",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ],
+            "name": "Aggregate sources and non-CO2 emissions sources on land "
+            "(Agriculture)",
+        },
+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
+        "M.AG.ELV": {
+            "sources": ["M.3.C.AG", "M.3.D.AG"],
+            "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4", "5"],
+            "name": "National total emissions excluding LULUCF",
+        },
+    },
+    "basket_copy": {
+        "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
+        "entities": ["HFCS", "PFCS"],
+        "source_GWP": gwp_to_use,
+    },
+}
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}

+ 226 - 0
UNFCCC_GHG_data/UNFCCC_reader/Burundi/read_BDI_BUR1_from_pdf.py

@@ -0,0 +1,226 @@
+import camelot
+import primap2 as pm2
+import pandas as pd
+
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from UNFCCC_GHG_data.helper.functions import process_data_for_country
+
+from config_BDI_BUR1 import (
+    inv_conf,
+    meta_data,
+    filter_remove,
+    coords_value_mapping,
+    coords_terminologies,
+    coords_defaults,
+    coords_cols,
+    gas_baskets,
+    country_processing_step1,
+    inv_conf_per_year,
+)
+
+# ###
+# configuration
+# ###
+
+input_folder = downloaded_data_path / "UNFCCC" / "Burundi" / "BUR1"
+output_folder = extracted_data_path / "UNFCCC" / "Burundi"
+
+if not output_folder.exists():
+    output_folder.mkdir()
+
+pdf_file = "Burundi_BUR_1_Report__Francais.pdf"
+output_filename = "BDI_BUR1_2023_"
+category_column = f"category ({coords_terminologies['category']})"
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# 1. Read in tables
+# ###
+
+df_all = None
+for year in inv_conf_per_year.keys():
+    print("-" * 60)
+    print(f"Reading year {year}.")
+    print("-" * 60)
+    df_year = None
+    for page in inv_conf_per_year[year]["pages_to_read"]:
+        print(f"Reading table from page {page}.")
+        tables_inventory_original = camelot.read_pdf(
+            str(input_folder / pdf_file),
+            pages=page,
+            flavor="lattice",
+            split_text=True,
+        )
+        print("Reading complete.")
+
+        df_page = tables_inventory_original[0].df
+
+        if df_year is None:
+            df_year = df_page
+        else:
+            df_year = pd.concat(
+                [df_year, df_page],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    print(f"Concatenating all tables for {year}.")
+    # remove line breaks
+    for column in df_year.columns:
+        df_year[column] = df_year[column].str.replace("\n", "")
+
+    # fix broken values in cells
+    if "fix_values" in inv_conf_per_year[year].keys():
+        for index, column, value in inv_conf_per_year[year]["fix_values"]:
+            df_year.at[index, column] = value
+
+    # delete extra columns
+    if "delete_columns" in inv_conf_per_year[year].keys():
+        for column in inv_conf_per_year[year]["delete_columns"]:
+            df_year = df_year.drop(columns=column)
+        df_year.columns = range(df_year.columns.size)
+
+    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
+
+    df_year = pd.concat([df_header, df_year[2:]], axis=0, join="outer").reset_index(
+        drop=True
+    )
+
+    df_year = pm2.pm2io.nir_add_unit_information(
+        df_year,
+        unit_row=inv_conf["unit_row"],
+        entity_row=inv_conf["entity_row"],
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="Gg",
+    )
+
+    print("Added unit information.")
+
+    # set index
+    df_year = df_year.set_index(inv_conf["index_cols"])
+
+    # convert to long format
+    df_year_long = pm2.pm2io.nir_convert_df_to_long(
+        df_year, year, inv_conf["header_long"]
+    )
+
+    # extract from tuple
+    df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
+
+    # prep for conversion to PM2 IF and native format
+    # make a copy of the categories row
+    df_year_long["category"] = df_year_long["orig_cat_name"]
+
+    # replace cat names by codes in col "category"
+    # first the manual replacements
+    df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
+
+    df_year_long["category"] = df_year_long["category"].replace(
+        inv_conf["cat_codes_manual"]
+    )
+
+    df_year_long["category"] = df_year_long["category"].str.replace(".", "")
+
+    # then the regex replacements
+    def repl(m):
+        return m.group("code")
+
+    df_year_long["category"] = df_year_long["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_year_long = df_year_long.reset_index(drop=True)
+
+    df_year_long["data"] = df_year_long["data"].str.replace(",", ".")
+
+    # TODO: I don't think there are NE1 in the tables.
+    # df_year_long["data"] = df_year_long["data"].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_year_long.columns = df_year_long.columns.map(str)
+
+    df_year_long = df_year_long.drop(columns=["orig_cat_name"])
+
+    if df_all is None:
+        df_all = df_year_long
+    else:
+        df_all = pd.concat(
+            [df_all, df_year_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+### convert to interchange format ###
+print("Converting to interchange format.")
+df_all_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+
+### convert to primap2 format ###
+print("Converting to primap2 format.")
+data_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
+
+
+# ###
+# Save raw data to IF and native format.
+# ###
+
+data_if = data_pm2.pr.to_interchange_format()
+
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+    data_if,
+)
+
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+    encoding=encoding,
+)
+
+
+# ###
+# Processing
+# ###
+
+data_proc_pm2 = process_data_for_country(
+    data_country=data_pm2,
+    entities_to_ignore=[],
+    gas_baskets=gas_baskets,
+    filter_dims=None,
+    cat_terminology_out=None,
+    category_conversion=None,
+    sectors_out=None,
+    processing_info_country=country_processing_step1,
+)
+
+# ###
+# save processed data to IF and native format
+# ###
+
+terminology_proc = coords_terminologies["category"]
+
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + terminology_proc), data_proc_if
+)
+
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
+)
+
+print("Saved processed data.")

+ 457 - 0
UNFCCC_GHG_data/UNFCCC_reader/Guinea/config_GIN_BUR1.py

@@ -0,0 +1,457 @@
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "GIN-GHG-Inventory",
+    "provenance": "measured",
+    "area": "GIN",
+    "scenario": "BUR1",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+# gwp conversion is mentioned on page 20 in the report
+gwp_to_use = "AR4GWP100"
+coords_value_mapping = {
+    "main": {
+        "unit": "PRIMAP1",
+        "category": "PRIMAP1",
+        "entity": {
+            "HFCs": f"HFCS ({gwp_to_use})",
+            "PFCs": f"PFCS ({gwp_to_use})",
+            "SF6": f"SF6 ({gwp_to_use})",
+            "NMVOCs": "NMVOC",
+        },
+    },
+    "energy": {
+        "unit": "PRIMAP1",
+        "category": "PRIMAP1",
+        "entity": {
+            "NMVOCs": "NMVOC",
+        },
+    },
+    "afolu": {
+        "unit": "PRIMAP1",
+        "category": "PRIMAP1",
+        "entity": {
+            "NMVOCs": "NMVOC",
+        },
+    },
+    "waste": {
+        "unit": "PRIMAP1",
+        "category": "PRIMAP1",
+        "entity": {
+            "NMVOCs": "NMVOC",
+        },
+    },
+    "trend": {
+        "unit": "PRIMAP1",
+        "category": "PRIMAP1",
+        "entity": {
+            "NMVOCs": "NMVOC",
+        },
+    },
+}
+
+filter_remove = {
+    "f_memo": {"category": "MEMO"},
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/629549",
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Guinea. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+page_def_templates = {
+    "110": {
+        "area": ["36,718,589,87"],
+        "cols": ["290,340,368,392,425,445,465,497,535,564"],
+    },
+    "111": {
+        "area": ["36,736,587,107"],
+        "cols": ["293,335,369,399,424,445,468,497,535,565"],
+    },
+    "112": {
+        "area": ["35,733,588,106"],
+        "cols": ["293,335,369,399,424,445,468,497,535,565"],
+    },
+    "113": {
+        "area": ["35,733,588,106"],
+        "cols": ["293,335,365,399,424,445,468,497,535,565"],
+    },
+    "131": {
+        "area": ["36,718,590,83"],
+        "cols": ["293,332,370,406,442,480,516,554"],
+    },
+}
+
+# for main table
+header_inventory = [
+    "Greenhouse gas source and sink categories",
+    "CO2",
+    "CH4",
+    "N2O",
+    "HFCs",
+    "PFCs",
+    "SF6",
+    "NOx",
+    "CO",
+    "NMVOCs",
+    "SO2",
+]
+
+unit_inventory = ["-"] + ["Gg"] * len(
+    header_inventory
+)  # one extra for the category columns
+unit_inventory[4] = "GgCO2eq"
+unit_inventory[5] = "GgCO2eq"
+unit_inventory[6] = "GgCO2eq"
+
+# for energy tables
+header_energy = [
+    "Greenhouse gas source and sink categories",
+    "CO2",
+    "CH4",
+    "N2O",
+    "NOx",
+    "CO",
+    "NMVOCs",
+    "SO2",
+]
+unit_energy = ["-"] + ["Gg"] * len(header_energy)  # one extra for the category columns
+
+# for afolu tables
+header_afolu = [
+    "Greenhouse gas source and sink categories",
+    "CO2",
+    "CH4",
+    "N2O",
+    "NOx",
+    "CO",
+    "NMVOCs",
+]
+unit_afolu = ["-"] + ["Gg"] * (len(header_afolu) - 1)
+
+# for waste table
+header_waste = [
+    "Greenhouse gas source and sink categories",
+    "CO2",
+    "CH4",
+    "N2O",
+    "NOx",
+    "CO",
+    "NMVOCs",
+    "SO2",
+]
+unit_waste = ["-"] + ["Gg"] * (len(header_waste) - 1)
+
+# for trend table (unit is always Gg for this table)
+# 'data' prefix is needed for pd.wide_to_long() later
+header_trend = [
+    "orig_cat_name",
+    "data1990",
+    "data1995",
+    "data2000",
+    "data2005",
+    "data2010",
+    "data2015",
+    "data2018",
+    "data2019",
+]
+
+set_value = {
+    "main": {
+        "110": [
+            (4, 0, "1.A.1 - Industries énergétiques"),
+            (8, 0, "1.A.4 - Autres secteurs"),
+        ],
+        "111": [
+            (4, 0, "1.A.1 - Industries énergétiques"),
+            (8, 0, "1.A.4 - Autres secteurs"),
+        ],
+        "112": [
+            (4, 0, "1.A.1 - Industries énergétiques"),
+            (8, 0, "1.A.4 - Autres secteurs"),
+        ],
+    }
+}
+
+delete_row = {"main": {"110": [3, 7], "111": [3, 7], "112": [3, 7]}}
+
+delete_rows_by_category = {
+    "energy": {
+        "116": [
+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
+            "Éléments pour information",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
+        ],
+        "117": [
+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
+            "Éléments pour information",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
+        ],
+        "118": [
+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
+            "Éléments pour information",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
+        ],
+        "119": [
+            "1.A.3.a.i - Aviation internationale (Soutes internationales)",
+            "Information Items",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)",
+            "1.A.5.c - Opérations multilatérales (Éléments pour information)",
+        ],
+    },
+    "trend": {
+        # The categories 3.D / 3.D.1 / 3.D.2 contain values different to the main table
+        # They should also not contain negative values according to IPCC methodology:
+        # https://www.ipcc-nggip.iges.or.jp/public/2006gl/
+        # Therefore, the rows are deleted from the table.
+        "131": [
+            "3.D - Autres",
+            "3.D.1 - Produits ligneux récoltés",
+            "3.D.2 - Autres (veuillez spécifier)",
+        ],
+        # Delete empty line for pages 132-137.
+        "132": [""],
+        "133": [""],
+        "134": [""],
+        "135": [""],
+        "136": [""],
+        "137": [""],
+    },
+}
+
+# define config dict
+inv_conf = {
+    "header": header_inventory,
+    "unit": unit_inventory,
+    "header_energy": header_energy,
+    "unit_energy": unit_energy,
+    "header_afolu": header_afolu,
+    "unit_afolu": unit_afolu,
+    "header_waste": header_waste,
+    "unit_waste": unit_waste,
+    "header_trend": header_trend,
+    "entity_row": 0,
+    "unit_row": 1,
+    "index_cols": "Greenhouse gas source and sink categories",
+    "pages_to_read": {
+        "main": ["110", "111", "112", "113"],
+        "energy": ["116", "117", "118", "119"],
+        "afolu": ["124", "125", "126", "127"],
+        "waste": ["128", "130"],
+        # The table for CO (page 135) seems completely mixed up and should not be considered.
+        # The total CO values for 1990 equal the values in the main table.
+        # The total CO values for 1995 equal the values for 2000 in the main table.
+        # The total CO values for 2000 equal the values for 2010 in the main table.
+        # The total CO values for 2005 are identical to the 2019 values in the same table.
+        # The total CO values for 2010 are identical to the 1990 values in the same table.
+        # The total CO values for 2019 are identical to the 1995 values in the same table.
+        # And so on.
+        "trend": ["131", "132", "133", "134", "136", "137"],
+    },
+    "entity_for_page": {"trend": ["CO2", "CH4", "N2O", "NOx", "NMVOCs", "SO2"]},
+    "year": {
+        "110": 1990,
+        "111": 2000,
+        "112": 2010,
+        "113": 2019,
+        "116": 1990,
+        "117": 2000,
+        "118": 2010,
+        "119": 2019,
+        "124": 1990,
+        "125": 2000,
+        "126": 2010,
+        "127": 2019,
+    },
+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
+    "cat_codes_manual": {
+        "main": {
+            "Éléments pour mémoire": "MEMO",
+            "Soutes internationales": "M.BK",
+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
+            "Total des émissions et absorptions nationales": "0",
+            "2A5: Autre": "2A5",
+        },
+        "energy": {
+            "International Bunkers": "M.BK",
+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
+            "CO2 from Biomass Combustion for Energy Production": "M.BIO",
+        },
+        "trend": {
+            "Total des émissions et absorptions nationales": "0",
+            "2A5: Autre": "2A5",
+            "Éléments pour mémoire": "MEMO",
+            "Soutes internationales": "M.BK",
+            "1.A.3.a.i - Aviation internationale (soutes internationales)": "M.BK.A",
+            "1.A.3.d.i - Navigation internationale (soutes internationales)": "M.BK.M",
+            "1.A.5.c - Opérations multilatérales": "M.MULTIOP",
+        },
+    },
+}
+
+country_processing_step1 = {
+    "aggregate_cats": {
+        "M.3.C.AG": {
+            "sources": [
+                "3.C.1",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ],
+            "name": "Aggregate sources and non-CO2 emissions sources on land "
+            "(Agriculture)",
+        },
+        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
+        "M.AG.ELV": {
+            "sources": ["M.3.C.AG", "M.3.D.AG"],
+            "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
+        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+            "name": "National total emissions excluding LULUCF",
+        },
+    },
+    "basket_copy": {
+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        "entities": ["HFCS", "PFCS"],
+        "source_GWP": gwp_to_use,
+    },
+}
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}
+
+replace_info = {
+    "main": [
+        ("3", "CO", "2019", 27.406),
+        ("3.C", "CO", "2019", 27.406),
+        ("3.C.1", "CO", "2019", 27.406),
+        ("3", "N2O", "1990", 2.190),
+        ("3", "NOx", "2019", 1.644),
+        ("3.C", "NOx", "2019", 1.644),
+        ("3.C.1", "NOx", "2019", 1.644),
+        ("M.BK", "NOx", "1990", 0.001),
+        ("M.BK", "NOx", "2000", 0.003),
+        ("M.BK", "NOx", "2010", 0.052),
+        ("M.BK", "CO", "1990", 0.0002),
+        ("M.BK", "CO", "2000", 0.0006),
+        ("M.BK", "CO", "2010", 0.01),
+        ("M.BK", "NMVOC", "1990", 0.0001),
+        ("M.BK", "NMVOC", "2000", 0.0002),
+        ("M.BK", "NMVOC", "2010", 0.003),
+    ],
+    "trend": [
+        ("M.BK", "CH4", "1990"),
+        ("M.BK.A", "CH4", "1990"),
+        ("M.BK", "CH4", "2000"),
+        ("M.BK.A", "CH4", "2000"),
+        ("M.BK", "CH4", "2010"),
+        ("M.BK.A", "CH4", "2010"),
+        ("1.A.2", "N2O", "1990"),
+        ("M.BK", "N2O", "1990"),
+        ("M.BK.A", "N2O", "1990"),
+        ("M.BK", "N2O", "2000"),
+        ("M.BK.A", "N2O", "2000"),
+        ("M.BK", "N2O", "2010"),
+        ("M.BK.A", "N2O", "2010"),
+        ("M.BK", "N2O", "2019"),
+        ("M.BK.A", "N2O", "2019"),
+        ("M.BK", "NOx", "1990"),
+        ("M.BK", "NOx", "2000"),
+        ("M.BK", "NOx", "2010"),
+        ("3.C", "NOx", "2019"),
+        ("3.C.1", "NOx", "2019"),
+        ("3", "NOx", "2019"),
+        ("1.A.2", "NMVOC", "1990"),
+        ("M.BK", "NMVOC", "1990"),
+        ("0", "NMVOC", "2000"),
+        ("1", "NMVOC", "2000"),
+        ("1.A", "NMVOC", "2000"),
+        ("1.A.1", "NMVOC", "2000"),
+        ("1.A.2", "NMVOC", "2000"),
+        ("1.A.3", "NMVOC", "2000"),
+        ("1.A.4", "NMVOC", "2000"),
+        ("2", "NMVOC", "2000"),
+        ("2.H", "NMVOC", "2000"),
+        ("2.H.2", "NMVOC", "2000"),
+        ("M.BK", "NMVOC", "2000"),
+        ("0", "NMVOC", "2010"),
+        ("1", "NMVOC", "2010"),
+        ("1.A", "NMVOC", "2010"),
+        ("1.A.1", "NMVOC", "2010"),
+        ("1.A.2", "NMVOC", "2010"),
+        ("1.A.3", "NMVOC", "2010"),
+        ("1.A.4", "NMVOC", "2010"),
+        ("2", "NMVOC", "2010"),
+        ("M.BK", "NMVOC", "2010"),
+        ("1.A.2", "NMVOC", "2019"),
+    ],
+}
+
+replace_categories = {
+    "afolu": {
+        "124-126": [
+            (17, "3.A.2.a.i - Vaches laitières"),
+            (18, "3.A.2.a.ii - Autres bovins"),
+            (19, "3.A.2.b - Buffle"),
+            (20, "3.A.2.c - Ovins"),
+            (21, "3.A.2.d - Caprins"),
+            (22, "3.A.2.e - Chameaux"),
+            (23, "3.A.2.f - Chevaux"),
+            (24, "3.A.2.g - Mules et ânes"),
+            (25, "3.A.2.h - Porcins"),
+            (26, "3.A.2.i - Volailles"),
+        ],
+        "127": [
+            (19, "3.A.2.a.i - Vaches laitières"),
+            (20, "3.A.2.a.ii - Autres bovins"),
+            (21, "3.A.2.b - Buffle"),
+            (22, "3.A.2.c - Ovins"),
+            (23, "3.A.2.d - Caprins"),
+            (24, "3.A.2.e - Chameaux"),
+            (25, "3.A.2.f - Chevaux"),
+            (26, "3.A.2.g - Mules et ânes"),
+            (27, "3.A.2.h - Porcins"),
+            (28, "3.A.2.i - Volailles"),
+            (29, "3.A.2.j - Autres (préciser)"),
+        ],
+    }
+}

+ 679 - 0
UNFCCC_GHG_data/UNFCCC_reader/Guinea/read_GIN_BUR1_from_pdf.py

@@ -0,0 +1,679 @@
+import camelot
+import primap2 as pm2
+import pandas as pd
+
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from UNFCCC_GHG_data.helper.functions import process_data_for_country
+from UNFCCC_GHG_data.helper.functions_temp import find_and_replace_values
+from config_GIN_BUR1 import coords_cols, coords_defaults, coords_terminologies
+from config_GIN_BUR1 import (
+    coords_value_mapping,
+    filter_remove,
+    meta_data,
+    page_def_templates,
+    delete_rows_by_category,
+)
+from config_GIN_BUR1 import (
+    inv_conf,
+    country_processing_step1,
+    gas_baskets,
+    replace_info,
+    replace_categories,
+    set_value,
+    delete_row,
+)
+
+# ###
+# configuration
+# ###
+
+input_folder = downloaded_data_path / "UNFCCC" / "Guinea" / "BUR1"
+output_folder = extracted_data_path / "UNFCCC" / "Guinea"
+if not output_folder.exists():
+    output_folder.mkdir()
+
+pdf_file = "Rapport_IGES-Guinee-BUR1_VF.pdf"
+output_filename = "GIN_BUR1_2023_"
+category_column = f"category ({coords_terminologies['category']})"
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# 1. Read in main tables
+# ###
+
+df_main = None
+for page in inv_conf["pages_to_read"]["main"]:
+    print("-" * 45)
+    print(f"Reading table from page {page}.")
+
+    tables_inventory_original = camelot.read_pdf(
+        str(input_folder / pdf_file),
+        pages=page,
+        table_areas=page_def_templates[page]["area"],
+        columns=page_def_templates[page]["cols"],
+        flavor="stream",
+        split_text=True,
+    )
+
+    print("Reading complete.")
+
+    df_inventory = tables_inventory_original[0].df.copy()
+
+    # set category names (they moved one row up)
+    if page in set_value["main"].keys():
+        for idx, col, value in set_value["main"][page]:
+            df_inventory.at[idx, col] = value
+    # delete empty row
+    if page in delete_row["main"].keys():
+        for idx in delete_row["main"][page]:
+            df_inventory = df_inventory.drop(index=idx)
+
+    # add header and unit
+    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
+    df_inventory = pd.concat(
+        [df_header, df_inventory], axis=0, join="outer"
+    ).reset_index(drop=True)
+    df_inventory = pm2.pm2io.nir_add_unit_information(
+        df_inventory,
+        unit_row=inv_conf["unit_row"],
+        entity_row=inv_conf["entity_row"],
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="Gg",
+    )
+
+    print("Added unit information.")
+
+    # set index
+    df_inventory = df_inventory.set_index(inv_conf["index_cols"])
+
+    # convert to long format
+    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
+        df_inventory, inv_conf["year"][page], inv_conf["header_long"]
+    )
+
+    # extract category from tuple
+    df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
+
+    # prep for conversion to PM2 IF and native format
+    df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
+
+    df_inventory_long["category"] = df_inventory_long["category"].replace(
+        inv_conf["cat_codes_manual"]["main"]
+    )
+
+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(".", "")
+
+    # regex replacements
+    def repl(m):
+        return m.group("code")
+
+    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_inventory_long = df_inventory_long.reset_index(drop=True)
+
+    df_inventory_long["data"] = df_inventory_long["data"].str.replace(",", ".")
+    df_inventory_long["data"] = df_inventory_long["data"].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_inventory_long.columns = df_inventory_long.columns.map(str)
+    df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
+
+    if df_main is None:
+        df_main = df_inventory_long
+    else:
+        df_main = pd.concat(
+            [df_main, df_inventory_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+print("Converting to interchange format.")
+df_all_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_main,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping["main"],
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+df_all_IF = find_and_replace_values(
+    df=df_all_IF, replace_info=replace_info["main"], category_column=category_column
+)
+
+### convert to primap2 format ###
+data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
+
+# ###
+# 2. Read energy sector tables
+# ###
+
+df_energy = None
+for page in inv_conf["pages_to_read"]["energy"]:
+    print("-" * 45)
+    print(f"Reading table from page {page}.")
+
+    tables_inventory_original = camelot.read_pdf(
+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
+    )
+
+    print("Reading complete.")
+
+    df_energy_year = pd.concat(
+        [tables_inventory_original[0].df[2:], tables_inventory_original[1].df[3:]],
+        axis=0,
+        join="outer",
+    ).reset_index(drop=True)
+
+    # TODO This step should be done in pm2.pm2io.convert_long_dataframe_if()
+    for row in delete_rows_by_category["energy"][page]:
+        row_to_delete = df_energy_year.index[df_energy_year[0] == row][0]
+        df_energy_year = df_energy_year.drop(index=row_to_delete)
+
+    # add header and unit
+    df_header = pd.DataFrame([inv_conf["header_energy"], inv_conf["unit_energy"]])
+
+    df_energy_year = pd.concat(
+        [df_header, df_energy_year], axis=0, join="outer"
+    ).reset_index(drop=True)
+
+    df_energy_year = pm2.pm2io.nir_add_unit_information(
+        df_energy_year,
+        unit_row=inv_conf["unit_row"],
+        entity_row=inv_conf["entity_row"],
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="Gg",
+    )
+
+    print("Added unit information.")
+    # set index
+    df_energy_year = df_energy_year.set_index(inv_conf["index_cols"])
+
+    # convert to long format
+    df_energy_year_long = pm2.pm2io.nir_convert_df_to_long(
+        df_energy_year, inv_conf["year"][page], inv_conf["header_long"]
+    )
+
+    # extract from tuple
+    df_energy_year_long["orig_cat_name"] = df_energy_year_long["orig_cat_name"].str[0]
+
+    # prep for conversion to PM2 IF and native format
+    # make a copy of the categories row
+    df_energy_year_long["category"] = df_energy_year_long["orig_cat_name"]
+
+    # replace cat names by codes in col "category"
+    # first the manual replacements
+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
+        "\n", ""
+    )
+    df_energy_year_long["category"] = df_energy_year_long["category"].replace(
+        inv_conf["cat_codes_manual"]["energy"]
+    )
+
+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
+        ".", ""
+    )
+
+    # then the regex replacements
+    def repl(m):
+        return m.group("code")
+
+    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_energy_year_long = df_energy_year_long.reset_index(drop=True)
+
+    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace(",", ".")
+    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_energy_year_long.columns = df_energy_year_long.columns.map(str)
+    df_energy_year_long = df_energy_year_long.drop(columns=["orig_cat_name"])
+
+    if df_energy is None:
+        df_energy = df_energy_year_long
+    else:
+        df_energy = pd.concat(
+            [df_energy, df_energy_year_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+print("Converting to interchange format.")
+df_energy_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_energy,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping["energy"],
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+### convert to primap2 format ###
+data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)
+
+# ###
+# 3. Read in afolu table
+# ###
+
+df_afolu = None
+for page in inv_conf["pages_to_read"]["afolu"]:
+    print("-" * 45)
+    print(f"Reading table from page {page}.")
+
+    tables_inventory_original = camelot.read_pdf(
+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
+    )
+    print("Reading complete.")
+
+    if page == "127":
+        # table on page 127 has one extra row at the top
+        # and one extra category 3.A.1.j
+        df_afolu_year = tables_inventory_original[0].df[3:]
+        # 3.A.1.a.i to 3.A.1.j exist twice.
+        # Rename duplicate categories in tables.
+        for index, category_name in replace_categories["afolu"]["127"]:
+            df_afolu_year.at[index, 0] = category_name
+    else:
+        # cut first two lines
+        df_afolu_year = tables_inventory_original[0].df[2:]
+        # On pages 124-126 the wrong categories are slightly different
+        for index, category_name in replace_categories["afolu"]["124-126"]:
+            df_afolu_year.at[index, 0] = category_name
+
+    # add header and unit
+    df_header = pd.DataFrame([inv_conf["header_afolu"], inv_conf["unit_afolu"]])
+
+    df_afolu_year = pd.concat(
+        [df_header, df_afolu_year], axis=0, join="outer"
+    ).reset_index(drop=True)
+
+    df_afolu_year = pm2.pm2io.nir_add_unit_information(
+        df_afolu_year,
+        unit_row=inv_conf["unit_row"],
+        entity_row=inv_conf["entity_row"],
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="Gg",
+    )
+
+    print("Added unit information.")
+
+    # set index
+    df_afolu_year = df_afolu_year.set_index(inv_conf["index_cols"])
+
+    # convert to long format
+    df_afolu_year_long = pm2.pm2io.nir_convert_df_to_long(
+        df_afolu_year, inv_conf["year"][page], inv_conf["header_long"]
+    )
+
+    df_afolu_year_long["orig_cat_name"] = df_afolu_year_long["orig_cat_name"].str[0]
+
+    # prep for conversion to PM2 IF and native format
+    # make a copy of the categories row
+    df_afolu_year_long["category"] = df_afolu_year_long["orig_cat_name"]
+
+    # regex replacements
+    def repl(m):
+        return m.group("code")
+
+    df_afolu_year_long["category"] = df_afolu_year_long["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_afolu_year_long = df_afolu_year_long.reset_index(drop=True)
+
+    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace(",", ".")
+    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_afolu_year_long.columns = df_afolu_year_long.columns.map(str)
+    df_afolu_year_long = df_afolu_year_long.drop(columns=["orig_cat_name"])
+
+    if df_afolu is None:
+        df_afolu = df_afolu_year_long
+    else:
+        df_afolu = pd.concat(
+            [df_afolu, df_afolu_year_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+print("Converting to interchange format.")
+df_afolu_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_afolu,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping["afolu"],
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+### convert to primap2 format ###
+data_pm2_afolu = pm2.pm2io.from_interchange_format(df_afolu_IF)
+
+# ###
+# 4. Read in Waste tables - pages 128, 130
+# ###
+
+# There are three tables for three years on page 128
+# and another table for the last year on page 130
+
+# read the first three tables
+page = inv_conf["pages_to_read"]["waste"][0]
+tables_inventory_original_128 = camelot.read_pdf(
+    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
+)
+
+# read last table
+page = inv_conf["pages_to_read"]["waste"][1]
+tables_inventory_original_130 = camelot.read_pdf(
+    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
+)
+
+# combine in a dict
+df_waste_years = {
+    "1990": tables_inventory_original_128[0].df,
+    "2000": tables_inventory_original_128[1].df,
+    "2010": tables_inventory_original_128[2].df,
+    "2019": tables_inventory_original_130[0].df,
+}
+
+df_waste = None
+for year in df_waste_years.keys():
+    print("-" * 45)
+    print(f"Processing table for {year}.")
+
+    df_waste_year = df_waste_years[year][2:]
+
+    # add header and unit
+    df_header = pd.DataFrame([inv_conf["header_waste"], inv_conf["unit_waste"]])
+
+    df_waste_year = pd.concat(
+        [df_header, df_waste_year], axis=0, join="outer"
+    ).reset_index(drop=True)
+
+    df_waste_year = pm2.pm2io.nir_add_unit_information(
+        df_waste_year,
+        unit_row=inv_conf["unit_row"],
+        entity_row=inv_conf["entity_row"],
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="Gg",
+    )
+
+    print("Added unit information.")
+
+    # set index
+    df_waste_year = df_waste_year.set_index(inv_conf["index_cols"])
+
+    # convert to long format
+    df_waste_year_long = pm2.pm2io.nir_convert_df_to_long(
+        df_waste_year, year, inv_conf["header_long"]
+    )
+
+    df_waste_year_long["orig_cat_name"] = df_waste_year_long["orig_cat_name"].str[0]
+
+    # prep for conversion to PM2 IF and native format
+    # make a copy of the categories row
+    df_waste_year_long["category"] = df_waste_year_long["orig_cat_name"]
+
+    # regex replacements
+    def repl(m):
+        return m.group("code")
+
+    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_waste_year_long = df_waste_year_long.reset_index(drop=True)
+
+    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(".", "")
+    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace(",", ".")
+    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_waste_year_long.columns = df_waste_year_long.columns.map(str)
+    df_waste_year_long = df_waste_year_long.drop(columns=["orig_cat_name"])
+
+    if df_waste is None:
+        df_waste = df_waste_year_long
+    else:
+        df_waste = pd.concat(
+            [df_waste, df_waste_year_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+print("Converting to interchange format.")
+df_waste_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_waste,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping["waste"],
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+### convert to primap2 format ###
+data_pm2_waste = pm2.pm2io.from_interchange_format(df_waste_IF)
+
+# ###
+# 5. Read in trend tables - pages 131 - 137
+# ###
+
+df_trend = None
+pages = inv_conf["pages_to_read"]["trend"]
+entities = inv_conf["entity_for_page"]["trend"]
+
+# for this set of tables every page is a different entity
+for page, entity in zip(pages, entities):
+    print("-" * 45)
+    print(f"Reading table for page {page} and entity {entity}.")
+
+    # First table must be read in with flavor="stream", as
+    # flavor="lattice" raises an error. Maybe camelot issue
+    # see https://github.com/atlanhq/camelot/issues/306,
+    # or because characters in first row almost touch
+    # the table grid.
+    if page == "131":
+        tables_inventory_original = camelot.read_pdf(
+            str(input_folder / pdf_file),
+            pages=page,
+            table_areas=page_def_templates[page]["area"],
+            columns=page_def_templates[page]["cols"],
+            flavor="stream",
+            split_text=True,
+        )
+
+        df_trend_entity = tables_inventory_original[0].df[1:]
+
+    else:
+        tables_inventory_original = camelot.read_pdf(
+            str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
+        )
+        df_trend_entity = tables_inventory_original[0].df[3:]
+
+    print("Reading complete.")
+
+    if page in delete_rows_by_category["trend"].keys():
+        for category in delete_rows_by_category["trend"][page]:
+            row_to_delete = df_trend_entity.index[df_trend_entity[0] == category][0]
+            df_trend_entity = df_trend_entity.drop(index=row_to_delete)
+
+    df_trend_entity.columns = inv_conf["header_trend"]
+
+    df_trend_entity = df_trend_entity.copy()
+
+    # unit is always Gg
+    df_trend_entity.loc[:, "unit"] = "Gg"
+
+    # only one entity per table
+    df_trend_entity.loc[:, "entity"] = entity
+
+    df_trend_entity.loc[:, "category"] = df_trend_entity["orig_cat_name"]
+
+    df_trend_entity["category"] = df_trend_entity["category"].replace(
+        inv_conf["cat_codes_manual"]["trend"]
+    )
+
+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
+        ".", ""
+    )
+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
+        "\n", ""
+    )
+
+    def repl(m):
+        return m.group("code")
+
+    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
+        inv_conf["cat_code_regexp"], repl, regex=True
+    )
+
+    df_trend_entity = df_trend_entity.reset_index(drop=True)
+
+    print("Created category codes.")
+
+    for year in inv_conf["header_trend"][1:]:
+        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(",", ".")
+        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace("NE1", "NE")
+
+    # make sure all col headers are str
+    df_trend_entity.columns = df_trend_entity.columns.map(str)
+
+    df_trend_entity = df_trend_entity.drop(columns=["orig_cat_name"])
+
+    # TODO better to use pm2.pm2io.convert_wide_dataframe_if
+    df_trend_entity_long = pd.wide_to_long(
+        df_trend_entity, stubnames="data", i="category", j="time"
+    )
+
+    print("Converted to long format.")
+
+    df_trend_entity_long = df_trend_entity_long.reset_index()
+
+    if df_trend is None:
+        df_trend = df_trend_entity_long
+    else:
+        df_trend = pd.concat(
+            [df_trend, df_trend_entity_long],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
+
+print("Converting to interchange format.")
+
+df_trend_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_trend,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping["trend"],
+    filter_remove=filter_remove,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+)
+
+df_trend_IF = find_and_replace_values(
+    df=df_trend_IF, replace_info=replace_info["trend"], category_column=category_column
+)
+
+### convert to primap2 format ###
+data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
+
+# ###
+# Combine tables
+# ###
+
+# merge main and energy
+# There are discrepancies larger than 0.86 for area category 1.A.2, entity NMVOC,
+# years 1990, 2000, 2010, 2019
+# It is assumed the main table has the correct values.
+print("Merging main and energy table.")
+data_pm2 = data_pm2_main.pr.merge(data_pm2_energy, tolerance=1)
+
+# merge afolu
+print("Merging afolu table.")
+data_pm2 = data_pm2.pr.merge(data_pm2_afolu, tolerance=0.11)
+
+# merge waste
+# increasing tolerance to merge values for 4.C, 1990, N2O - 0.003 in sector table, 0.0034 in main table
+print("Merging waste table.")
+data_pm2 = data_pm2.pr.merge(data_pm2_waste, tolerance=0.15)
+
+# merge trend
+print("Merging trend table.")
+data_pm2 = data_pm2.pr.merge(data_pm2_trend, tolerance=0.11)
+
+# convert back to IF to have units in the fixed format ( per year / per a / per annum)
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# Save raw data to IF and native format.
+# ###
+
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+    data_if,
+)
+
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+    encoding=encoding,
+)
+
+# ###
+# Processing
+# ###
+
+data_proc_pm2 = process_data_for_country(
+    data_country=data_pm2,
+    entities_to_ignore=[],
+    gas_baskets=gas_baskets,
+    filter_dims=None,  # leaving this explicit for now
+    cat_terminology_out=None,
+    category_conversion=None,
+    sectors_out=None,
+    processing_info_country=country_processing_step1,
+)
+
+# ###
+# save processed data to IF and native format
+# ###
+
+terminology_proc = coords_terminologies["category"]
+
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + terminology_proc), data_proc_if
+)
+
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
+)
+
+print("Saved processed data.")

+ 3 - 1
UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json

@@ -8,10 +8,12 @@
     "NGA": "Nigeria",
     "MAR": "Morocco",
     "COL": "Colombia",
+    "GIN": "Guinea",
     "CHL": "Chile",
     "PER": "Peru",
     "MYS": "Malaysia",
     "MNE": "Montenegro",
     "ISR": "Israel",
-    "IDN": "Indonesia"
+    "IDN": "Indonesia",
+    "BDI": "Burundi"
 }

+ 55 - 0
UNFCCC_GHG_data/helper/functions_temp.py

@@ -0,0 +1,55 @@
+"""Temporary file for new functions to avoid merging issues due to different automatic formatting. Delete after merge."""
+
+import numpy as np
+import pandas as pd
+
+
+def find_and_replace_values(
+    df: pd.DataFrame,
+    replace_info: list[tuple[str | float]],
+    category_column: str,
+    entity_column: str = "entity",
+) -> pd.DataFrame:
+    """
+    Find values and replace single values in a dataframe.
+
+    Input
+    -----
+    df
+        Input data frame
+    replace_info
+        Category, entity, year, and new value. Don't put a new value if you would like to replace with nan.
+        For example [("3.C", "CO", "2019", 3.423)] or [("3.C", "CO", "2019")]
+    category_column
+        The name of the column that contains the categories.
+    entity_column
+        The name of the column that contains the categories.
+
+    Output
+    ------
+        Data frame with updated values.
+
+    """
+    for replace_info_value in replace_info:
+        category = replace_info_value[0]
+        entity = replace_info_value[1]
+        year = replace_info_value[2]
+
+        if len(replace_info_value) == 4:
+            new_value = replace_info_value[3]
+        elif len(replace_info_value) == 3:
+            new_value = np.nan
+        else:
+            raise AssertionError(
+                f"Expected tuple of length 3 or 4. Got {replace_info_value}"
+            )
+
+        index = df.loc[
+            (df[category_column] == category) & (df[entity_column] == entity),
+        ].index[0]
+
+        # pandas recommends using .at[] for changing single values
+        df.at[index, year] = new_value
+        print(f"Set value for {category}, {entity}, {year} to {new_value}.")
+
+    return df

+ 1 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/PK/xF/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv/MD5E-s299244--f790b1166c54fd5b601264b6f563487e.csv

+ 1 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/J6/kq/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc/MD5E-s280055--6e7d18a4d48652ecbd3d77c2800c8226.nc

+ 22 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/611668
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Burundi. Biennial update report (BUR). BUR1 Processed on 2024-04-16
+  comment: Read fom pdf by Daniel Busch Processed on 2024-04-16
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_PRIMAP)
+  - area (ISO3)
+  - source
+  - scenario (PRIMAP)
+  - provenance
+  - entity
+  - unit
+data_file: BDI_BUR1_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/WF/7x/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv/MD5E-s114782--4bbd01fb2bbed1cfa762c674e0447fbe.csv

+ 1 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/xv/7q/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc/MD5E-s111420--fcbfc2cd0e8adf22cbbebfda5ad57ef3.nc

+ 22 - 0
extracted_data/UNFCCC/Burundi/BDI_BUR1_2023_IPCC2006_PRIMAP_raw.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/611668
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Burundi. Biennial update report (BUR). BUR1
+  comment: Read fom pdf by Daniel Busch
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_PRIMAP)
+  - area (ISO3)
+  - source
+  - scenario (PRIMAP)
+  - provenance
+  - entity
+  - unit
+data_file: BDI_BUR1_2023_IPCC2006_PRIMAP_raw.csv

+ 1 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+/annex/objects/MD5E-s204594--a04b7a0db8398441177fdb164e5e2114.csv

+ 1 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+/annex/objects/MD5E-s231196--7fbb7b4b58db901bc953231000cb5cb1.nc

+ 22 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/629549
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Guinea. Biennial update report (BUR). BUR1 Processed on 2024-04-16
+  comment: Read fom pdf by Daniel Busch Processed on 2024-04-16
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_PRIMAP)
+  - source
+  - area (ISO3)
+  - provenance
+  - scenario (PRIMAP)
+  - entity
+  - unit
+data_file: GIN_BUR1_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.csv

@@ -0,0 +1 @@
+/annex/objects/MD5E-s86243--6b88e6c39832467ab21383324926c679.csv

+ 1 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.nc

@@ -0,0 +1 @@
+/annex/objects/MD5E-s108241--60115b2f44c314b243cfa3a64c324dcd.nc

+ 22 - 0
extracted_data/UNFCCC/Guinea/GIN_BUR1_2023_IPCC2006_PRIMAP_raw.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/629549
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Guinea. Biennial update report (BUR). BUR1
+  comment: Read fom pdf by Daniel Busch
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_PRIMAP)
+  - source
+  - area (ISO3)
+  - provenance
+  - scenario (PRIMAP)
+  - entity
+  - unit
+data_file: GIN_BUR1_2023_IPCC2006_PRIMAP_raw.csv

+ 1 - 1
setup.cfg

@@ -31,7 +31,7 @@ packages =
     UNFCCC_GHG_data.UNFCCC_DI_reader
     UNFCCC_GHG_data.helper
 #UNFCCC_GHG_data.datasets
-python_requires = >=3.8
+python_requires = >=3.8, <3.11
 setup_requires =
     setuptools_scm
 install_requires =