Daniel Busch 6 mesi fa
parent
commit
80e9351cf6

+ 5 - 45
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py

@@ -1,18 +1,5 @@
 """
 Configuration file to read Saint Kitts and Nevis' BUR 1.
-
-Tables to read:
-- The sector tables in the Annex from page 149 - done
-- trend tables page 111-113 - done
-- page 116 - done
-- page 118 - done
-- page 119 - done
-- page 121 - done
-- page 123 - wip
-
-Not reading:
-- page 97 - trend table with data for 2008, because it's in the trend tables from page 111
-- page 113 - slice of trend table on page 111
 """
 
 gwp_to_use = "AR5GWP100"
@@ -63,7 +50,7 @@ filter_remove = {
     "f1": {
         "entity": "Other halogenated gases without CO2 equivalent conversion factors (2)"
     },
-    "f2": {"entity": "3.D.2.LULUCF"},
+    "f2": {"entity": "3D2LULUCF"},
 }
 
 conf_general = {
@@ -342,7 +329,6 @@ conf_trend = {
 
 conf = {
     "energy": {
-        # TODO: List of entities are always keys of unit mapping dict
         "entities": ["CO2", "CH4", "N2O", "NOX", "CO", "NMVOCs", "SO2"],
         "header": ["orig_category"],
         "cat_codes_manual": {
@@ -496,8 +482,9 @@ fix_values_main = [
 ]
 
 fix_values_trend = [
-    # values for gas basket (KYOTOGHG (AR5GWP100)) don't match
-    # Taking values from main table
+    # Most of the values for (KYOTOGHG (AR5GWP100)) don't match
+    # with the values from the main table.
+    # Replacing with values from main table
     # energy
     ("1A3bi", "2018", "64.74"),  # (category, year, new_value)
     ("1A3bi1", "2018", "64.7"),
@@ -551,7 +538,7 @@ gas_baskets = {
 }
 
 country_processing_step1 = {
-    "tolerance": 0.01,  # errors up to 10 % due to rounding, e.g. 1.A.3.b.iv 0.16 and 0.17
+    "tolerance": 0.01,
     "aggregate_cats": {
         "M.3.D.AG": {"sources": ["3.D.2"]},
         "M.3.C.AG": {
@@ -585,7 +572,6 @@ country_processing_step1 = {
         },
         "3.D": {"sources": ["3.D.1", "3.D.2"]},
         "M.AG": {"sources": ["3.A", "M.AG.ELV"]},
-        # "M.AG": {"sources": ["3.A.1", "3.A.2", "3.C.4", "3.C.3", "3.D.2"]},
         "M.3.D.LU": {"sources": ["3.D.1"]},
         "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"]},
         "M.0.EL": {
@@ -599,30 +585,4 @@ country_processing_step1 = {
         "entities": ["HFCS", "PFCS", "UnspMixOfHFCs"],
         "source_GWP": gwp_to_use,
     },
-    # "downscale": {
-    #     "sectors": {
-    #         "1.B_CH4": {
-    #             "basket": "1.B",
-    #             "basket_contents": ["1.B.1", "1.B.2"],
-    #             "entities": ["CH4"],
-    #             "dim": f"category ({coords_terminologies['category']})",
-    #             # "tolerance": 0.05,  # some inconsistencies (rounding?)
-    #         },
-    #         "1.B_CO2": {
-    #             "basket": "1.B",
-    #             "basket_contents": ["1.B.1", "1.B.2"],
-    #             "entities": ["CO2"],
-    #             "dim": f"category ({coords_terminologies['category']})",
-    #             "sel": {
-    #                 "time": [
-    #                     "2000",
-    #                     "2005",
-    #                     "2010",
-    #                     "2015",
-    #                     "2020",
-    #                 ]
-    #             },
-    #         },
-    #     }
-    # },
 }

+ 10 - 14
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py

@@ -45,7 +45,7 @@ if __name__ == "__main__":
         return m.group("code")
 
     # ###
-    # 2. Read trend tables
+    # 1. Read trend tables
     # ###
 
     df_trend = None
@@ -58,7 +58,6 @@ if __name__ == "__main__":
             tables_inventory_original = camelot.read_pdf(
                 str(input_folder / pdf_file),
                 pages=page,
-                # flavor="lattice",
                 split_text=True,
                 **conf_trend[table]["page_defs"][page]["read_params"],
             )
@@ -118,7 +117,6 @@ if __name__ == "__main__":
         df_table["category"] = df_table["category"].str.replace(
             conf_general["cat_code_regexp"], repl, regex=True
         )
-
         df_table = df_table.drop(columns="orig_category")
 
         # drop rows if needed
@@ -127,7 +125,7 @@ if __name__ == "__main__":
                 row_to_delete = df_table.index[df_table["category"] == row][0]
                 df_table = df_table.drop(index=row_to_delete)
 
-        # clean values
+        # bring values in right format
         for year in conf_trend[table]["years"]:
             if "replace_data_entries" in conf_trend[table].keys():
                 df_table[year] = df_table[year].replace(
@@ -135,7 +133,10 @@ if __name__ == "__main__":
                 )
             df_table[year] = df_table[year].str.replace("\n", "")
             df_table[year] = df_table[year].str.replace(",", ".")
-            # invisible numbers in trend table on page 112
+            # There are "invisible" numbers in trend table on page 112, "A. Forest Land"
+            # I'm removing them here, but they actually belong to the above,
+            # which I didn't know when I wrote this code
+            # TODO: Invisible values can be added to row above directly
             if "split_values" in conf_trend[table].keys():
                 cat = conf_trend[table]["split_values"]["cat"]
                 keep_value_no = conf_trend[table]["split_values"]["keep_value_no"]
@@ -168,7 +169,8 @@ if __name__ == "__main__":
                 join="outer",
             ).reset_index(drop=True)
 
-    # some categories present in main and detailled tables
+    # some categories present in trend table on page 112 and the following detailed
+    # tables for the sub-categories
     df_trend = df_trend.drop_duplicates()
 
     for cat, year, new_value in fix_values_trend:
@@ -180,13 +182,10 @@ if __name__ == "__main__":
     df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
         df_trend,
         coords_cols=coords_cols,
-        # add_coords_cols=add_coords_cols,
         coords_defaults=coords_defaults,
         coords_terminologies=coords_terminologies,
         coords_value_mapping=coords_value_mapping,
-        # coords_value_filling=coords_value_filling,
         filter_remove=filter_remove,
-        # filter_keep=filter_keep,
         meta_data=meta_data,
     )
     #
@@ -195,11 +194,11 @@ if __name__ == "__main__":
     data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_if)
 
     # ###
-    # 1. Read in main tables
+    # 2. Read in main tables
     # ###
 
     df_main = None
-    for sector in reversed(conf.keys()):
+    for sector in conf.keys():
         print("-" * 45)
         print(f"Reading table for {sector}.")
 
@@ -210,7 +209,6 @@ if __name__ == "__main__":
                 str(input_folder / pdf_file),
                 pages=page,
                 flavor="lattice",
-                # split_text=True,
             )
 
             df_page = tables_inventory_original[0].df
@@ -285,8 +283,6 @@ if __name__ == "__main__":
                 join="outer",
             ).reset_index(drop=True)
 
-        # break
-
     # year is the same for all sector tables
     df_main["time"] = "2018"