пре 11 месеци · 0bcd5dcfec
--- a/UNFCCC_GHG_data/UNFCCC_reader/Burundi/config_BDI_BUR1.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Burundi/config_BDI_BUR1.py
@@ -0,0 +1,94 @@
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+# define config dict
			
 
				+inv_conf = {
			
 
				+    "entity_row": 0,
			
 
				+    "unit_row": 1,
			
 
				+    "index_cols": "Greenhouse gas source and sink categories",
			
 
				+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
			
 
				+    "2005": {
			
 
				+        "pages_to_read": ["197", "198", "199", "200"],
			
 
				+        "header": [
			
 
				+            "Greenhouse gas source and sink categories",
			
 
				+            "CO2",
			
 
				+            "CH4",
			
 
				+            "N2O",
			
 
				+            "HFCs",
			
 
				+            "PFCs",
			
 
				+            "SF6",
			
 
				+            "Other halogenated gases with CO2 equivalent conversion factors",
			
 
				+            "Other halogenated gases without CO2 equivalent conversion factors" "NOx",
			
 
				+            "CO",
			
 
				+            "NMVOCs",
			
 
				+            "SO2",
			
 
				+        ],
			
 
				+        "unit": [
			
 
				+            "-",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+        ],
			
 
				+        "cat_codes_manual": {
			
 
				+            "Memo Items (5)": "MEMO",
			
 
				+            "International Bunkers": "M.BK",
			
 
				+            "1.A.3.a.i - International Aviation (International Bunkers) (1)": "M.BK.A",
			
 
				+            "1.A.3.d.i - International water-borne navigation (International bunkers) (1)": "M.BK.M",
			
 
				+            "1.A.5.c - Multilateral Operations (1)(2)": "M.MULTIOP",
			
 
				+            "Total National Emissions and Removals": "0",
			
 
				+        },
			
 
				+    },
			
 
				+    "2006": {
			
 
				+        "pages_to_read": ["201", "202", "203", "204"],
			
 
				+        "header": [
			
 
				+            "Greenhouse gas source and sink categories",
			
 
				+            "CO2",
			
 
				+            "CH4",
			
 
				+            "N2O",
			
 
				+            "HFCs",
			
 
				+            "PFCs",
			
 
				+            "SF6",
			
 
				+            "Other halogenated gases with CO2 equivalent conversion factors",
			
 
				+            "Other halogenated gases without CO2 equivalent conversion factors" "NOx",
			
 
				+            "CO",
			
 
				+            "NMVOCs",
			
 
				+            "SO2",
			
 
				+        ],
			
 
				+        "unit": [
			
 
				+            "-",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "GgCO2eq",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+            "Gg",
			
 
				+        ],
			
 
				+        "cat_codes_manual": {
			
 
				+            "Memo Items (5)": "MEMO",
			
 
				+            "International Bunkers": "M.BK",
			
 
				+            "1.A.3.a.i - International Aviation (International Bunkers) (1)": "M.BK.A",
			
 
				+            "1.A.3.d.i - International water-borne navigation (International bunkers) (1)": "M.BK.M",
			
 
				+            "1.A.5.c - Multilateral Operations (1)(2)": "M.MULTIOP",
			
 
				+            "Total National Emissions and Removals": "0",
			
 
				+        },
			
 
				+    },
			
 
				+}
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Burundi/read_BDI_BUR1_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Burundi/read_BDI_BUR1_from_pdf.py
@@ -0,0 +1,135 @@
 
				+import os
			
 
				+
			
 
				+os.environ["UNFCCC_GHG_ROOT_PATH"] = (
			
 
				+    "/Users/danielbusch/Documents/UNFCCC_non-AnnexI_data"
			
 
				+)
			
 
				+
			
 
				+import camelot
			
 
				+import primap2 as pm2
			
 
				+import pandas as pd
			
 
				+
			
 
				+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				+from config_BDI_BUR1 import coords_terminologies, inv_conf
			
 
				+
			
 
				+# ###
			
 
				+# configuration
			
 
				+# ###
			
 
				+
			
 
				+input_folder = downloaded_data_path / "UNFCCC" / "Burundi" / "BUR1"
			
 
				+output_folder = extracted_data_path / "UNFCCC" / "Burundi"
			
 
				+
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+pdf_file = "Burundi_BUR_1_Report__Francais.pdf"
			
 
				+output_filename = "BDI_BUR1_2023_"
			
 
				+category_column = f"category ({coords_terminologies['category']})"
			
 
				+compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+# ###
			
 
				+# 1. Read in tables
			
 
				+# ###
			
 
				+
			
 
				+# table for the year 2005
			
 
				+year = "2005"
			
 
				+years_to_read = ["2005", "2006"]
			
 
				+df_all = None
			
 
				+for year in years_to_read:
			
 
				+    df_year = None
			
 
				+    for page in inv_conf[year]["pages_to_read"]:
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading table from page {page}.")
			
 
				+
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=page,
			
 
				+            # table_areas=page_def_templates[page]["area"],
			
 
				+            # columns=page_def_templates[page]["cols"],
			
 
				+            flavor="lattice",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+
			
 
				+        print("Reading complete.")
			
 
				+
			
 
				+        df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+        if df_year is None:
			
 
				+            df_year = df_page
			
 
				+        else:
			
 
				+            df_year = pd.concat(
			
 
				+                [df_year, df_page],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    # remove line breaks
			
 
				+    for column in df_year.columns:
			
 
				+        df_year[column] = df_year[column].str.replace("\n", "")
			
 
				+
			
 
				+    df_header = pd.DataFrame([inv_conf[year]["header"], inv_conf[year]["unit"]])
			
 
				+
			
 
				+    df_year = pd.concat([df_header, df_year[2:]], axis=0, join="outer").reset_index(
			
 
				+        drop=True
			
 
				+    )
			
 
				+
			
 
				+    df_year = pm2.pm2io.nir_add_unit_information(
			
 
				+        df_year,
			
 
				+        unit_row=inv_conf["unit_row"],
			
 
				+        entity_row=inv_conf["entity_row"],
			
 
				+        regexp_entity=".*",
			
 
				+        regexp_unit=".*",
			
 
				+        default_unit="Gg",
			
 
				+    )
			
 
				+
			
 
				+    print("Added unit information.")
			
 
				+
			
 
				+    # set index
			
 
				+    df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				+
			
 
				+    # convert to long format
			
 
				+    df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+        df_year, year, inv_conf["header_long"]
			
 
				+    )
			
 
				+
			
 
				+    # extract from tuple
			
 
				+    df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				+
			
 
				+    # prep for conversion to PM2 IF and native format
			
 
				+    # make a copy of the categories row
			
 
				+    df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				+
			
 
				+    # replace cat names by codes in col "category"
			
 
				+    # first the manual replacements
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
			
 
				+    df_year_long["category"] = df_year_long["category"].replace(
			
 
				+        inv_conf["2005"]["cat_codes_manual"]
			
 
				+    )
			
 
				+
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				+
			
 
				+    # then the regex replacements
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_year_long = df_year_long.reset_index(drop=True)
			
 
				+
			
 
				+    df_year_long["data"] = df_year_long["data"].str.replace(",", ".")
			
 
				+    df_year_long["data"] = df_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+    # make sure all col headers are str
			
 
				+    df_year_long.columns = df_year_long.columns.map(str)
			
 
				+    df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+    if df_all is None:
			
 
				+        df_all = df_year_long
			
 
				+    else:
			
 
				+        df_all = pd.concat(
			
 
				+            [df_all, df_year_long],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)