Переглянути джерело

main table from page 86 - unprocessed

Daniel Busch 9 місяців тому
батько
коміт
8334870494

+ 11 - 2
src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/config_cpv_bur1.py

@@ -69,7 +69,7 @@ inv_conf_per_sector = {
             "Waste": "4",
         },
         "header": ["category", "entity", *trend_years],
-        "unit": "Gg",
+        "unit": ["Gg"] * 4 + ["Gg CO2eq"] + ["Gg"] * 9,
     },
     "int_bunkers": {
         "page": "39",
@@ -80,8 +80,17 @@ inv_conf_per_sector = {
             "International shipping": "M.BK.M",
         },
         "header": ["category", *trend_years],
-        "unit": "Gg",
+        "unit": "Gg CO2eq",
         "drop_cols": 7,
         "entity": "KYOTOGHG (SARGWP100)",
     },
 }
+
+inv_conf_main = {
+    "pages": {
+        "86": {"skip_rows_start": 2},
+        "87": {"skip_rows_start": 2},
+        "88": {"skip_rows_start": 2},
+        "89": {"skip_rows_start": 2},
+    },
+}

+ 38 - 3
src/unfccc_ghg_data/unfccc_reader/Cabo_Verde/read_CPV_BUR1_from_pdf.py

@@ -13,6 +13,7 @@ from unfccc_ghg_data.unfccc_reader.Cabo_Verde.config_cpv_bur1 import (
     coords_defaults,
     coords_terminologies,
     coords_value_mapping,
+    inv_conf_main,
     inv_conf_per_sector,
     meta_data,
     trend_years,
@@ -35,10 +36,42 @@ if __name__ == "__main__":
     compression = dict(zlib=True, complevel=9)
 
     # ###
-    # 1. Read in tables
+    # 2. Read sector-specific main tables for 2019
+    # ###
+
+    df_main = None
+    for page in inv_conf_main["pages"].keys():
+        tables_inventory_original = camelot.read_pdf(
+            str(input_folder / pdf_file),
+            pages=page,
+            flavor="lattice",
+            split_text=True,
+        )
+
+        df_page = tables_inventory_original[0].df
+
+        skip_rows_start = inv_conf_main["pages"][page]["skip_rows_start"]
+        if not skip_rows_start == 0:
+            df_page = df_page[skip_rows_start:]
+
+        # stack the tables vertically
+        if df_main is None:
+            df_main = df_page
+        else:
+            df_main = pd.concat(
+                [
+                    df_main,
+                    df_page,
+                ],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    # ###
+    # 1. Read trend tables 1995, 2000, 2005, 2010, 2015 and 2019
     # ###
     df_trend = None
-    for sector in reversed(inv_conf_per_sector.keys()):
+    for sector in inv_conf_per_sector.keys():
         tables_inventory_original = camelot.read_pdf(
             str(input_folder / pdf_file),
             pages=inv_conf_per_sector[sector]["page"],
@@ -107,4 +140,6 @@ if __name__ == "__main__":
         meta_data=meta_data,
     )
 
-    pass
+    ### convert to primap2 format ###
+    print("Converting to primap2 format.")
+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)