9 meses atrás · fa847ef62b
--- a/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py
@@ -0,0 +1,87 @@
 
				+"""
			
 
				+Configuration for Cabo Verde BUR1 (read from pdf)
			
 
				+"""
			
 
				+
			
 
				+# reading tables on pages:
			
 
				+# 33/1, GHG emissions and removals by type of gas, by sector and by year
			
 
				+# 39, Total GHG Emissions, in CO2eq, for international bunkers, in 1995, 2000, 2005, 2010, 2015 and 2019
			
 
				+# 86-89, GHG emissions in 2019
			
 
				+# Not reading tables on pages:
			
 
				+# 37/38, has additional columns on PFCs, Unspecified mixture of HFCs and PFCs,
			
 
				+# and SF6, but they are all empty
			
 
				+# 32, same information as in table 33/1
			
 
				+# 33/2, aggregation of table 33/1
			
 
				+# 43, no new information here
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC2006_PRIMAP",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+# primap2 format conversion
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "CPV-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "CPV",
			
 
				+    "scenario": "BUR1",
			
 
				+}
			
 
				+
			
 
				+gwp_to_use = "SARGWP100"
			
 
				+
			
 
				+coords_value_mapping = {
			
 
				+    "unit": "PRIMAP1",
			
 
				+    "category": "PRIMAP1",
			
 
				+    "entity": {
			
 
				+        "CO²": "CO2",
			
 
				+        "CH⁴": "CH4",
			
 
				+        "N²O": "N2O",
			
 
				+        "F-gases": f"FGASES ({gwp_to_use})",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/sites/default/files/resource/BUR_EN_Digital.pdf",  # TODO check other sources
			
 
				+    "rights": "",  # unknown
			
 
				+    "contact": "daniel-busch@climate-resource.de",
			
 
				+    "title": "Cabo Verde. Biennial update report (BUR). BUR1",
			
 
				+    "comment": "Read fom pdf by Daniel Busch",
			
 
				+    "institution": "UNFCCC",
			
 
				+}
			
 
				+
			
 
				+trend_years = ["1995", "2000", "2005", "2010", "2015", "2019"]
			
 
				+
			
 
				+inv_conf_per_sector = {
			
 
				+    "main": {
			
 
				+        "page": "33",
			
 
				+        "skip_rows_start": 2,
			
 
				+        "cat_codes_manual": {
			
 
				+            "Energy": "1",
			
 
				+            "IPPU": "2",
			
 
				+            "Agriculture": "M.AG",
			
 
				+            "LULUCF": "M.LULUCF",
			
 
				+            "Waste": "4",
			
 
				+        },
			
 
				+        "header": ["category", "entity", *trend_years],
			
 
				+        "unit": "Gg",
			
 
				+    },
			
 
				+    "int_bunkers": {
			
 
				+        "page": "39",
			
 
				+        "skip_rows_start": 2,
			
 
				+        "cat_codes_manual": {
			
 
				+            "Total International Bunkers": "M.BK",
			
 
				+            "International aviation": "M.BK.A",
			
 
				+            "International shipping": "M.BK.M",
			
 
				+        },
			
 
				+        "header": ["category", *trend_years],
			
 
				+        "unit": "Gg",
			
 
				+        "drop_cols": 7,
			
 
				+        "entity": "KYOTOGHG (SARGWP100)",
			
 
				+    },
			
 
				+}
			
--- a/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py
@@ -0,0 +1,110 @@
 
				+"""
			
 
				+Read Burundi's BUR1 from pdf
			
 
				+"""
			
 
				+
			
 
				+import camelot
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+import primap2 as pm2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from unfccc_ghg_data.unfccc_reader.Cabo_Verde.config_cpv_bur1 import (
			
 
				+    coords_cols,
			
 
				+    coords_defaults,
			
 
				+    coords_terminologies,
			
 
				+    coords_value_mapping,
			
 
				+    inv_conf_per_sector,
			
 
				+    meta_data,
			
 
				+    trend_years,
			
 
				+)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Cabo_Verde" / "BUR1"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Cabo_Verde"
			
 
				+
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    pdf_file = "BUR_EN_Digital.pdf"
			
 
				+    output_filename = "CPV_BUR1_2023_"
			
 
				+    category_column = f"category ({coords_terminologies['category']})"
			
 
				+    compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+    # ###
			
 
				+    # 1. Read in tables
			
 
				+    # ###
			
 
				+    df_trend = None
			
 
				+    for sector in reversed(inv_conf_per_sector.keys()):
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=inv_conf_per_sector[sector]["page"],
			
 
				+            flavor="lattice",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+
			
 
				+        df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+        # cut rows at the top if needed
			
 
				+        skip_rows_start = inv_conf_per_sector[sector]["skip_rows_start"]
			
 
				+        if not skip_rows_start == 0:
			
 
				+            df_page = df_page[skip_rows_start:]
			
 
				+
			
 
				+        # drop columns if needed
			
 
				+        if "drop_cols" in inv_conf_per_sector[sector].keys():
			
 
				+            # print(df_current.columns.to_numpy())
			
 
				+            df_page = df_page.drop(columns=inv_conf_per_sector[sector]["drop_cols"])
			
 
				+
			
 
				+        df_page.columns = inv_conf_per_sector[sector]["header"]
			
 
				+
			
 
				+        # fill empty strings with NaN and the forward fill category names
			
 
				+        df_page["category"] = df_page["category"].replace("", np.nan).ffill()
			
 
				+
			
 
				+        # remove /n from category names
			
 
				+        df_page["category"] = df_page["category"].str.replace("\n", "")
			
 
				+        # manual replacement of categories
			
 
				+        df_page["category"] = df_page["category"].replace(
			
 
				+            inv_conf_per_sector[sector]["cat_codes_manual"]
			
 
				+        )
			
 
				+
			
 
				+        # remove all thousand separator commas
			
 
				+        for year in trend_years:
			
 
				+            df_page[year] = df_page[year].str.replace(",", "")
			
 
				+
			
 
				+        # add unit
			
 
				+        df_page["unit"] = inv_conf_per_sector[sector]["unit"]
			
 
				+
			
 
				+        # add entity if needed
			
 
				+        if "entity" in inv_conf_per_sector[sector].keys():
			
 
				+            df_page["entity"] = inv_conf_per_sector[sector]["entity"]
			
 
				+
			
 
				+        # stack the tables vertically
			
 
				+        if df_trend is None:
			
 
				+            df_trend = df_page
			
 
				+        else:
			
 
				+            df_trend = pd.concat(
			
 
				+                [
			
 
				+                    df_trend,
			
 
				+                    df_page,
			
 
				+                ],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    data_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_trend,
			
 
				+        coords_cols=coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				+        meta_data=meta_data,
			
 
				+    )
			
 
				+
			
 
				+    pass