před 9 měsíci · fa847ef62b
--- a/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py
@@ -0,0 +1,87 @@
 
															+"""
														
 
															+Configuration for Cabo Verde BUR1 (read from pdf)
														
 
															+"""
														
 
															+
														
 
															+# reading tables on pages:
														
 
															+# 33/1, GHG emissions and removals by type of gas, by sector and by year
														
 
															+# 39, Total GHG Emissions, in CO2eq, for international bunkers, in 1995, 2000, 2005, 2010, 2015 and 2019
														
 
															+# 86-89, GHG emissions in 2019
														
 
															+# Not reading tables on pages:
														
 
															+# 37/38, has additional columns on PFCs, Unspecified mixture of HFCs and PFCs,
														
 
															+# and SF6, but they are all empty
														
 
															+# 32, same information as in table 33/1
														
 
															+# 33/2, aggregation of table 33/1
														
 
															+# 43, no new information here
														
 
															+
														
 
															+coords_terminologies = {
														
 
															+    "area": "ISO3",
														
 
															+    "category": "IPCC2006_PRIMAP",
														
 
															+    "scenario": "PRIMAP",
														
 
															+}
														
 
															+
														
 
															+# primap2 format conversion
														
 
															+coords_cols = {
														
 
															+    "category": "category",
														
 
															+    "entity": "entity",
														
 
															+    "unit": "unit",
														
 
															+}
														
 
															+
														
 
															+coords_defaults = {
														
 
															+    "source": "CPV-GHG-Inventory",
														
 
															+    "provenance": "measured",
														
 
															+    "area": "CPV",
														
 
															+    "scenario": "BUR1",
														
 
															+}
														
 
															+
														
 
															+gwp_to_use = "SARGWP100"
														
 
															+
														
 
															+coords_value_mapping = {
														
 
															+    "unit": "PRIMAP1",
														
 
															+    "category": "PRIMAP1",
														
 
															+    "entity": {
														
 
															+        "CO²": "CO2",
														
 
															+        "CH⁴": "CH4",
														
 
															+        "N²O": "N2O",
														
 
															+        "F-gases": f"FGASES ({gwp_to_use})",
														
 
															+    },
														
 
															+}
														
 
															+
														
 
															+meta_data = {
														
 
															+    "references": "https://unfccc.int/sites/default/files/resource/BUR_EN_Digital.pdf",  # TODO check other sources
														
 
															+    "rights": "",  # unknown
														
 
															+    "contact": "daniel-busch@climate-resource.de",
														
 
															+    "title": "Cabo Verde. Biennial update report (BUR). BUR1",
														
 
															+    "comment": "Read fom pdf by Daniel Busch",
														
 
															+    "institution": "UNFCCC",
														
 
															+}
														
 
															+
														
 
															+trend_years = ["1995", "2000", "2005", "2010", "2015", "2019"]
														
 
															+
														
 
															+inv_conf_per_sector = {
														
 
															+    "main": {
														
 
															+        "page": "33",
														
 
															+        "skip_rows_start": 2,
														
 
															+        "cat_codes_manual": {
														
 
															+            "Energy": "1",
														
 
															+            "IPPU": "2",
														
 
															+            "Agriculture": "M.AG",
														
 
															+            "LULUCF": "M.LULUCF",
														
 
															+            "Waste": "4",
														
 
															+        },
														
 
															+        "header": ["category", "entity", *trend_years],
														
 
															+        "unit": "Gg",
														
 
															+    },
														
 
															+    "int_bunkers": {
														
 
															+        "page": "39",
														
 
															+        "skip_rows_start": 2,
														
 
															+        "cat_codes_manual": {
														
 
															+            "Total International Bunkers": "M.BK",
														
 
															+            "International aviation": "M.BK.A",
														
 
															+            "International shipping": "M.BK.M",
														
 
															+        },
														
 
															+        "header": ["category", *trend_years],
														
 
															+        "unit": "Gg",
														
 
															+        "drop_cols": 7,
														
 
															+        "entity": "KYOTOGHG (SARGWP100)",
														
 
															+    },
														
 
															+}
														
--- a/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py
@@ -0,0 +1,110 @@
 
															+"""
														
 
															+Read Burundi's BUR1 from pdf
														
 
															+"""
														
 
															+
														
 
															+import camelot
														
 
															+import numpy as np
														
 
															+import pandas as pd
														
 
															+import primap2 as pm2
														
 
															+
														
 
															+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
 
															+from unfccc_ghg_data.unfccc_reader.Cabo_Verde.config_cpv_bur1 import (
														
 
															+    coords_cols,
														
 
															+    coords_defaults,
														
 
															+    coords_terminologies,
														
 
															+    coords_value_mapping,
														
 
															+    inv_conf_per_sector,
														
 
															+    meta_data,
														
 
															+    trend_years,
														
 
															+)
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    # ###
														
 
															+    # configuration
														
 
															+    # ###
														
 
															+
														
 
															+    input_folder = downloaded_data_path / "UNFCCC" / "Cabo_Verde" / "BUR1"
														
 
															+    output_folder = extracted_data_path / "UNFCCC" / "Cabo_Verde"
														
 
															+
														
 
															+    if not output_folder.exists():
														
 
															+        output_folder.mkdir()
														
 
															+
														
 
															+    pdf_file = "BUR_EN_Digital.pdf"
														
 
															+    output_filename = "CPV_BUR1_2023_"
														
 
															+    category_column = f"category ({coords_terminologies['category']})"
														
 
															+    compression = dict(zlib=True, complevel=9)
														
 
															+
														
 
															+    # ###
														
 
															+    # 1. Read in tables
														
 
															+    # ###
														
 
															+    df_trend = None
														
 
															+    for sector in reversed(inv_conf_per_sector.keys()):
														
 
															+        tables_inventory_original = camelot.read_pdf(
														
 
															+            str(input_folder / pdf_file),
														
 
															+            pages=inv_conf_per_sector[sector]["page"],
														
 
															+            flavor="lattice",
														
 
															+            split_text=True,
														
 
															+        )
														
 
															+
														
 
															+        df_page = tables_inventory_original[0].df
														
 
															+
														
 
															+        # cut rows at the top if needed
														
 
															+        skip_rows_start = inv_conf_per_sector[sector]["skip_rows_start"]
														
 
															+        if not skip_rows_start == 0:
														
 
															+            df_page = df_page[skip_rows_start:]
														
 
															+
														
 
															+        # drop columns if needed
														
 
															+        if "drop_cols" in inv_conf_per_sector[sector].keys():
														
 
															+            # print(df_current.columns.to_numpy())
														
 
															+            df_page = df_page.drop(columns=inv_conf_per_sector[sector]["drop_cols"])
														
 
															+
														
 
															+        df_page.columns = inv_conf_per_sector[sector]["header"]
														
 
															+
														
 
															+        # fill empty strings with NaN and the forward fill category names
														
 
															+        df_page["category"] = df_page["category"].replace("", np.nan).ffill()
														
 
															+
														
 
															+        # remove /n from category names
														
 
															+        df_page["category"] = df_page["category"].str.replace("\n", "")
														
 
															+        # manual replacement of categories
														
 
															+        df_page["category"] = df_page["category"].replace(
														
 
															+            inv_conf_per_sector[sector]["cat_codes_manual"]
														
 
															+        )
														
 
															+
														
 
															+        # remove all thousand separator commas
														
 
															+        for year in trend_years:
														
 
															+            df_page[year] = df_page[year].str.replace(",", "")
														
 
															+
														
 
															+        # add unit
														
 
															+        df_page["unit"] = inv_conf_per_sector[sector]["unit"]
														
 
															+
														
 
															+        # add entity if needed
														
 
															+        if "entity" in inv_conf_per_sector[sector].keys():
														
 
															+            df_page["entity"] = inv_conf_per_sector[sector]["entity"]
														
 
															+
														
 
															+        # stack the tables vertically
														
 
															+        if df_trend is None:
														
 
															+            df_trend = df_page
														
 
															+        else:
														
 
															+            df_trend = pd.concat(
														
 
															+                [
														
 
															+                    df_trend,
														
 
															+                    df_page,
														
 
															+                ],
														
 
															+                axis=0,
														
 
															+                join="outer",
														
 
															+            ).reset_index(drop=True)
														
 
															+
														
 
															+    data_if = pm2.pm2io.convert_wide_dataframe_if(
														
 
															+        df_trend,
														
 
															+        coords_cols=coords_cols,
														
 
															+        # add_coords_cols=add_coords_cols,
														
 
															+        coords_defaults=coords_defaults,
														
 
															+        coords_terminologies=coords_terminologies,
														
 
															+        coords_value_mapping=coords_value_mapping,
														
 
															+        # coords_value_filling=coords_value_filling,
														
 
															+        # filter_remove=filter_remove,
														
 
															+        # filter_keep=filter_keep,
														
 
															+        meta_data=meta_data,
														
 
															+    )
														
 
															+
														
 
															+    pass