Ver código fonte

tables on page 33, 39

Daniel Busch 9 meses atrás
pai
commit
fa847ef62b

+ 87 - 0
src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py

@@ -0,0 +1,87 @@
+"""
+Configuration for Cabo Verde BUR1 (read from pdf)
+"""
+
+# reading tables on pages:
+# 33/1, GHG emissions and removals by type of gas, by sector and by year
+# 39, Total GHG Emissions, in CO2eq, for international bunkers, in 1995, 2000, 2005, 2010, 2015 and 2019
+# 86-89, GHG emissions in 2019
+# Not reading tables on pages:
+# 37/38, has additional columns on PFCs, Unspecified mixture of HFCs and PFCs,
+# and SF6, but they are all empty
+# 32, same information as in table 33/1
+# 33/2, aggregation of table 33/1
+# 43, no new information here
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "CPV-GHG-Inventory",
+    "provenance": "measured",
+    "area": "CPV",
+    "scenario": "BUR1",
+}
+
+gwp_to_use = "SARGWP100"
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+    "entity": {
+        "CO²": "CO2",
+        "CH⁴": "CH4",
+        "N²O": "N2O",
+        "F-gases": f"FGASES ({gwp_to_use})",
+    },
+}
+
+meta_data = {
+    "references": "https://unfccc.int/sites/default/files/resource/BUR_EN_Digital.pdf",  # TODO check other sources
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Cabo Verde. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+trend_years = ["1995", "2000", "2005", "2010", "2015", "2019"]
+
+inv_conf_per_sector = {
+    "main": {
+        "page": "33",
+        "skip_rows_start": 2,
+        "cat_codes_manual": {
+            "Energy": "1",
+            "IPPU": "2",
+            "Agriculture": "M.AG",
+            "LULUCF": "M.LULUCF",
+            "Waste": "4",
+        },
+        "header": ["category", "entity", *trend_years],
+        "unit": "Gg",
+    },
+    "int_bunkers": {
+        "page": "39",
+        "skip_rows_start": 2,
+        "cat_codes_manual": {
+            "Total International Bunkers": "M.BK",
+            "International aviation": "M.BK.A",
+            "International shipping": "M.BK.M",
+        },
+        "header": ["category", *trend_years],
+        "unit": "Gg",
+        "drop_cols": 7,
+        "entity": "KYOTOGHG (SARGWP100)",
+    },
+}

+ 110 - 0
src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py

@@ -0,0 +1,110 @@
+"""
+Read Burundi's BUR1 from pdf
+"""
+
+import camelot
+import numpy as np
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
+from unfccc_ghg_data.unfccc_reader.Cabo_Verde.config_cpv_bur1 import (
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    inv_conf_per_sector,
+    meta_data,
+    trend_years,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+
+    input_folder = downloaded_data_path / "UNFCCC" / "Cabo_Verde" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "Cabo_Verde"
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    pdf_file = "BUR_EN_Digital.pdf"
+    output_filename = "CPV_BUR1_2023_"
+    category_column = f"category ({coords_terminologies['category']})"
+    compression = dict(zlib=True, complevel=9)
+
+    # ###
+    # 1. Read in tables
+    # ###
+    df_trend = None
+    for sector in reversed(inv_conf_per_sector.keys()):
+        tables_inventory_original = camelot.read_pdf(
+            str(input_folder / pdf_file),
+            pages=inv_conf_per_sector[sector]["page"],
+            flavor="lattice",
+            split_text=True,
+        )
+
+        df_page = tables_inventory_original[0].df
+
+        # cut rows at the top if needed
+        skip_rows_start = inv_conf_per_sector[sector]["skip_rows_start"]
+        if not skip_rows_start == 0:
+            df_page = df_page[skip_rows_start:]
+
+        # drop columns if needed
+        if "drop_cols" in inv_conf_per_sector[sector].keys():
+            # print(df_current.columns.to_numpy())
+            df_page = df_page.drop(columns=inv_conf_per_sector[sector]["drop_cols"])
+
+        df_page.columns = inv_conf_per_sector[sector]["header"]
+
+        # fill empty strings with NaN and the forward fill category names
+        df_page["category"] = df_page["category"].replace("", np.nan).ffill()
+
+        # remove /n from category names
+        df_page["category"] = df_page["category"].str.replace("\n", "")
+        # manual replacement of categories
+        df_page["category"] = df_page["category"].replace(
+            inv_conf_per_sector[sector]["cat_codes_manual"]
+        )
+
+        # remove all thousand separator commas
+        for year in trend_years:
+            df_page[year] = df_page[year].str.replace(",", "")
+
+        # add unit
+        df_page["unit"] = inv_conf_per_sector[sector]["unit"]
+
+        # add entity if needed
+        if "entity" in inv_conf_per_sector[sector].keys():
+            df_page["entity"] = inv_conf_per_sector[sector]["entity"]
+
+        # stack the tables vertically
+        if df_trend is None:
+            df_trend = df_page
+        else:
+            df_trend = pd.concat(
+                [
+                    df_trend,
+                    df_page,
+                ],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    data_if = pm2.pm2io.convert_wide_dataframe_if(
+        df_trend,
+        coords_cols=coords_cols,
+        # add_coords_cols=add_coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        # coords_value_filling=coords_value_filling,
+        # filter_remove=filter_remove,
+        # filter_keep=filter_keep,
+        meta_data=meta_data,
+    )
+
+    pass