il y a 8 mois · 80e9351cf6
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
@@ -1,18 +1,5 @@
 
				 """
			
 
				 Configuration file to read Saint Kitts and Nevis' BUR 1.
			
 
				-
			
 
				-Tables to read:
			
 
				-- The sector tables in the Annex from page 149 - done
			
 
				-- trend tables page 111-113 - done
			
 
				-- page 116 - done
			
 
				-- page 118 - done
			
 
				-- page 119 - done
			
 
				-- page 121 - done
			
 
				-- page 123 - wip
			
 
				-
			
 
				-Not reading:
			
 
				-- page 97 - trend table with data for 2008, because it's in the trend tables from page 111
			
 
				-- page 113 - slice of trend table on page 111
			
 
				 """
			
 
				 
			
 
				 gwp_to_use = "AR5GWP100"
			
@@ -63,7 +50,7 @@ filter_remove = {
 
				     "f1": {
			
 
				         "entity": "Other halogenated gases without CO2 equivalent conversion factors (2)"
			
 
				     },
			
 
				-    "f2": {"entity": "3.D.2.LULUCF"},
			
 
				+    "f2": {"entity": "3D2LULUCF"},
			
 
				 }
			
 
				 
			
 
				 conf_general = {
			
@@ -342,7 +329,6 @@ conf_trend = {
 
				 
			
 
				 conf = {
			
 
				     "energy": {
			
 
				-        # TODO: List of entities are always keys of unit mapping dict
			
 
				         "entities": ["CO2", "CH4", "N2O", "NOX", "CO", "NMVOCs", "SO2"],
			
 
				         "header": ["orig_category"],
			
 
				         "cat_codes_manual": {
			
@@ -496,8 +482,9 @@ fix_values_main = [
 
				 ]
			
 
				 
			
 
				 fix_values_trend = [
			
 
				-    # values for gas basket (KYOTOGHG (AR5GWP100)) don't match
			
 
				-    # Taking values from main table
			
 
				+    # Most of the values for (KYOTOGHG (AR5GWP100)) don't match
			
 
				+    # with the values from the main table.
			
 
				+    # Replacing with values from main table
			
 
				     # energy
			
 
				     ("1A3bi", "2018", "64.74"),  # (category, year, new_value)
			
 
				     ("1A3bi1", "2018", "64.7"),
			
@@ -551,7 +538,7 @@ gas_baskets = {
 
				 }
			
 
				 
			
 
				 country_processing_step1 = {
			
 
				-    "tolerance": 0.01,  # errors up to 10 % due to rounding, e.g. 1.A.3.b.iv 0.16 and 0.17
			
 
				+    "tolerance": 0.01,
			
 
				     "aggregate_cats": {
			
 
				         "M.3.D.AG": {"sources": ["3.D.2"]},
			
 
				         "M.3.C.AG": {
			
@@ -585,7 +572,6 @@ country_processing_step1 = {
 
				         },
			
 
				         "3.D": {"sources": ["3.D.1", "3.D.2"]},
			
 
				         "M.AG": {"sources": ["3.A", "M.AG.ELV"]},
			
 
				-        # "M.AG": {"sources": ["3.A.1", "3.A.2", "3.C.4", "3.C.3", "3.D.2"]},
			
 
				         "M.3.D.LU": {"sources": ["3.D.1"]},
			
 
				         "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"]},
			
 
				         "M.0.EL": {
			
@@ -599,30 +585,4 @@ country_processing_step1 = {
 
				         "entities": ["HFCS", "PFCS", "UnspMixOfHFCs"],
			
 
				         "source_GWP": gwp_to_use,
			
 
				     },
			
 
				-    # "downscale": {
			
 
				-    #     "sectors": {
			
 
				-    #         "1.B_CH4": {
			
 
				-    #             "basket": "1.B",
			
 
				-    #             "basket_contents": ["1.B.1", "1.B.2"],
			
 
				-    #             "entities": ["CH4"],
			
 
				-    #             "dim": f"category ({coords_terminologies['category']})",
			
 
				-    #             # "tolerance": 0.05,  # some inconsistencies (rounding?)
			
 
				-    #         },
			
 
				-    #         "1.B_CO2": {
			
 
				-    #             "basket": "1.B",
			
 
				-    #             "basket_contents": ["1.B.1", "1.B.2"],
			
 
				-    #             "entities": ["CO2"],
			
 
				-    #             "dim": f"category ({coords_terminologies['category']})",
			
 
				-    #             "sel": {
			
 
				-    #                 "time": [
			
 
				-    #                     "2000",
			
 
				-    #                     "2005",
			
 
				-    #                     "2010",
			
 
				-    #                     "2015",
			
 
				-    #                     "2020",
			
 
				-    #                 ]
			
 
				-    #             },
			
 
				-    #         },
			
 
				-    #     }
			
 
				-    # },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
 
				         return m.group("code")
			
 
				 
			
 
				     # ###
			
 
				-    # 2. Read trend tables
			
 
				+    # 1. Read trend tables
			
 
				     # ###
			
 
				 
			
 
				     df_trend = None
			
@@ -58,7 +58,6 @@ if __name__ == "__main__":
 
				             tables_inventory_original = camelot.read_pdf(
			
 
				                 str(input_folder / pdf_file),
			
 
				                 pages=page,
			
 
				-                # flavor="lattice",
			
 
				                 split_text=True,
			
 
				                 **conf_trend[table]["page_defs"][page]["read_params"],
			
 
				             )
			
@@ -118,7 +117,6 @@ if __name__ == "__main__":
 
				         df_table["category"] = df_table["category"].str.replace(
			
 
				             conf_general["cat_code_regexp"], repl, regex=True
			
 
				         )
			
 
				-
			
 
				         df_table = df_table.drop(columns="orig_category")
			
 
				 
			
 
				         # drop rows if needed
			
@@ -127,7 +125,7 @@ if __name__ == "__main__":
 
				                 row_to_delete = df_table.index[df_table["category"] == row][0]
			
 
				                 df_table = df_table.drop(index=row_to_delete)
			
 
				 
			
 
				-        # clean values
			
 
				+        # bring values in right format
			
 
				         for year in conf_trend[table]["years"]:
			
 
				             if "replace_data_entries" in conf_trend[table].keys():
			
 
				                 df_table[year] = df_table[year].replace(
			
@@ -135,7 +133,10 @@ if __name__ == "__main__":
 
				                 )
			
 
				             df_table[year] = df_table[year].str.replace("\n", "")
			
 
				             df_table[year] = df_table[year].str.replace(",", ".")
			
 
				-            # invisible numbers in trend table on page 112
			
 
				+            # There are "invisible" numbers in trend table on page 112, "A. Forest Land"
			
 
				+            # I'm removing them here, but they actually belong to the above,
			
 
				+            # which I didn't know when I wrote this code
			
 
				+            # TODO: Invisible values can be added to row above directly
			
 
				             if "split_values" in conf_trend[table].keys():
			
 
				                 cat = conf_trend[table]["split_values"]["cat"]
			
 
				                 keep_value_no = conf_trend[table]["split_values"]["keep_value_no"]
			
@@ -168,7 +169,8 @@ if __name__ == "__main__":
 
				                 join="outer",
			
 
				             ).reset_index(drop=True)
			
 
				 
			
 
				-    # some categories present in main and detailled tables
			
 
				+    # some categories present in trend table on page 112 and the following detailed
			
 
				+    # tables for the sub-categories
			
 
				     df_trend = df_trend.drop_duplicates()
			
 
				 
			
 
				     for cat, year, new_value in fix_values_trend:
			
@@ -180,13 +182,10 @@ if __name__ == "__main__":
 
				     df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				         df_trend,
			
 
				         coords_cols=coords_cols,
			
 
				-        # add_coords_cols=add_coords_cols,
			
 
				         coords_defaults=coords_defaults,
			
 
				         coords_terminologies=coords_terminologies,
			
 
				         coords_value_mapping=coords_value_mapping,
			
 
				-        # coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				-        # filter_keep=filter_keep,
			
 
				         meta_data=meta_data,
			
 
				     )
			
 
				     #
			
@@ -195,11 +194,11 @@ if __name__ == "__main__":
 
				     data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_if)
			
 
				 
			
 
				     # ###
			
 
				-    # 1. Read in main tables
			
 
				+    # 2. Read in main tables
			
 
				     # ###
			
 
				 
			
 
				     df_main = None
			
 
				-    for sector in reversed(conf.keys()):
			
 
				+    for sector in conf.keys():
			
 
				         print("-" * 45)
			
 
				         print(f"Reading table for {sector}.")
			
 
				 
			
@@ -210,7 +209,6 @@ if __name__ == "__main__":
 
				                 str(input_folder / pdf_file),
			
 
				                 pages=page,
			
 
				                 flavor="lattice",
			
 
				-                # split_text=True,
			
 
				             )
			
 
				 
			
 
				             df_page = tables_inventory_original[0].df
			
@@ -285,8 +283,6 @@ if __name__ == "__main__":
 
				                 join="outer",
			
 
				             ).reset_index(drop=True)
			
 
				 
			
 
				-        # break
			
 
				-
			
 
				     # year is the same for all sector tables
			
 
				     df_main["time"] = "2018"