преди 8 месеца · 9af5cfe477
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
@@ -3,9 +3,9 @@ Configuration file to read Saint Kitts and Nevis' BUR 1.
 
															 Tables to read:
														
 
															 - The sector tables in the Annex from page 149 - done
														
 
															-- trend tables page 111-113
														
 
															-- page 117
														
 
															-- page 118
														
 
															+- trend tables page 111-113 - done
														
 
															+- page 116 - done
														
 
															+- page 118- work in progress
														
 
															 - page 119
														
 
															 - page 121
														
 
															 - page 124
														
@@ -70,6 +70,42 @@ conf_general = {
 
															 }
														
 
															 conf_trend = {
														
 
															+    "energy_industries": {
														
 
															+        "entity": f"KYOTOGHG ({gwp_to_use})",
														
 
															+        "unit": "GgCO2eq",
														
 
															+        "replace_data_entries": {"NO,NE": "NO"},
														
 
															+        "cat_codes_manual": {
														
 
															+            "a. Public electricity and heat production": "1.A.1.a",
														
 
															+            "b. Petroleum refining": "1.A.1.b",
														
 
															+            "c. Manufacture of solid fuels": "1.A.1.c",
														
 
															+        },
														
 
															+        "header": ["orig_category"],
														
 
															+        "years": [
														
 
															+            "2008",
														
 
															+            "2009",
														
 
															+            "2010",
														
 
															+            "2011",
														
 
															+            "2012",
														
 
															+            "2013",
														
 
															+            "2014",
														
 
															+            "2015",
														
 
															+            "2016",
														
 
															+            "2017",
														
 
															+            "2018",
														
 
															+        ],
														
 
															+        "extra_columns": [],
														
 
															+        "rows_to_fix": {3: ["a. Public electricity and heat"]},
														
 
															+        "page_defs": {
														
 
															+            "116": {
														
 
															+                "read_params": dict(
														
 
															+                    flavor="stream",
														
 
															+                    table_areas=["72,426,543,333"],
														
 
															+                    columns=["199,229,261,293,324,356,386,416,448,480,511"],
														
 
															+                ),
														
 
															+                "skip_rows_start": 2,
														
 
															+            },
														
 
															+        },
														
 
															+    },
														
 
															     "overview": {
														
 
															         "fix_single_value": {
														
 
															             "cat": "MBIO",
														
@@ -156,11 +192,11 @@ conf_trend = {
 
															             "keep_value_no": 1,
														
 
															         },
														
 
															         "page_defs": {
														
 
															-            "111": {"skip_rows_start": 1},
														
 
															-            "112": {"skip_rows_start": 1},
														
 
															-            "113": {"skip_rows_start": 1},
														
 
															+            "111": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
														
 
															+            "112": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
														
 
															+            "113": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
														
 
															         },
														
 
															-    }
														
 
															+    },
														
 
															 }
														
 
															 conf = {
														
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
@@ -5,7 +5,7 @@ import camelot
 
															 import pandas as pd
														
 
															 import primap2 as pm2
														
 
															-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
														
 
															+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, fix_rows
														
 
															 from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import (
														
 
															     conf,
														
 
															     conf_general,
														
@@ -40,7 +40,7 @@ if __name__ == "__main__":
 
															     # ###
														
 
															     df_trend = None
														
 
															-    for table in reversed(conf_trend.keys()):
														
 
															+    for table in conf_trend.keys():
														
 
															         print("-" * 45)
														
 
															         print(f"Reading {table} trend table.")
														
 
															         df_table = None
														
@@ -49,8 +49,9 @@ if __name__ == "__main__":
 
															             tables_inventory_original = camelot.read_pdf(
														
 
															                 str(input_folder / pdf_file),
														
 
															                 pages=page,
														
 
															-                flavor="lattice",
														
 
															+                # flavor="lattice",
														
 
															                 split_text=True,
														
 
															+                **conf_trend[table]["page_defs"][page]["read_params"],
														
 
															             )
														
 
															             df_page = tables_inventory_original[0].df
														
@@ -72,6 +73,17 @@ if __name__ == "__main__":
 
															                     join="outer",
														
 
															                 ).reset_index(drop=True)
														
 
															+        # fix content that spreads across multiple rows
														
 
															+        if "rows_to_fix" in conf_trend[table]:
														
 
															+            for n_rows in conf_trend[table]["rows_to_fix"].keys():
														
 
															+                print(f"Merge content for {n_rows=}")
														
 
															+                df_table = fix_rows(
														
 
															+                    df_table,
														
 
															+                    rows_to_fix=conf_trend[table]["rows_to_fix"][n_rows],
														
 
															+                    col_to_use=0,
														
 
															+                    n_rows=n_rows,
														
 
															+                )
														
 
															+
														
 
															         df_table.columns = (
														
 
															             conf_trend[table]["header"]
														
 
															             + conf_trend[table]["years"]
														
@@ -139,50 +151,8 @@ if __name__ == "__main__":
 
															                 join="outer",
														
 
															             ).reset_index(drop=True)
														
 
															-    #     # fill empty strings with NaN and the forward fill category names
														
 
															-    #     df_page["category"] = df_page["category"].replace("", np.nan).ffill()
														
 
															-    #
														
 
															-    #     # remove /n from category names
														
 
															-    #     df_page["category"] = df_page["category"].str.replace("\n", "")
														
 
															-    #     # manual replacement of categories
														
 
															-    #     df_page["category"] = df_page["category"].replace(
														
 
															-    #         inv_conf_per_sector[sector]["cat_codes_manual"]
														
 
															-    #     )
														
 
															-    #
														
 
															-    #     # remove all thousand separator commas
														
 
															-    #     for year in trend_years :
														
 
															-    #         df_page[year] = df_page[year].str.replace(",", ".")
														
 
															-    #
														
 
															-    #     # add unit
														
 
															-    #     df_page["unit"] = inv_conf_per_sector[sector]["unit"]
														
 
															-    #
														
 
															-    #     # add entity if needed
														
 
															-    #     if "entity" in inv_conf_per_sector[sector].keys() :
														
 
															-    #         df_page["entity"] = inv_conf_per_sector[sector]["entity"]
														
 
															-    #
														
 
															-    #     if "unit_conversion" in inv_conf_per_sector[sector].keys() :
														
 
															-    #         for year in trend_years :
														
 
															-    #             index = inv_conf_per_sector[sector]["unit_conversion"]["index"]
														
 
															-    #             conv_factor = inv_conf_per_sector[sector]["unit_conversion"][
														
 
															-    #                 "conversion_factor"
														
 
															-    #             ]
														
 
															-    #             df_page.loc[index, year] = str(
														
 
															-    #                 conv_factor * float(df_page.loc[index, year])
														
 
															-    #             )
														
 
															-    #
														
 
															-    #     # stack the tables vertically
														
 
															-    #     if df_trend is None :
														
 
															-    #         df_trend = df_page
														
 
															-    #     else :
														
 
															-    #         df_trend = pd.concat(
														
 
															-    #             [
														
 
															-    #                 df_trend,
														
 
															-    #                 df_page,
														
 
															-    #             ],
														
 
															-    #             axis=0,
														
 
															-    #             join="outer",
														
 
															-    #         ).reset_index(drop=True)
														
 
															-    #
														
 
															+        # break
														
 
															+
														
 
															     df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
														
 
															         df_trend,
														
 
															         coords_cols=coords_cols,