8 ay önce · bb46722613
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
@@ -2,7 +2,7 @@
 
				 Configuration file to read Saint Kitts and Nevis' BUR 1.
			
 
				 
			
 
				 Tables to read:
			
 
				-- The sector tables in the Annex from page 149
			
 
				+- The sector tables in the Annex from page 149 - done
			
 
				 - trend tables page 111-113
			
 
				 - page 117
			
 
				 - page 118
			
@@ -69,6 +69,100 @@ conf_general = {
 
				     "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
			
 
				 }
			
 
				 
			
 
				+conf_trend = {
			
 
				+    "overview": {
			
 
				+        "fix_single_value": {
			
 
				+            "cat": "MBIO",
			
 
				+            "year": "2018",
			
 
				+            "new_value": "0.17",
			
 
				+        },
			
 
				+        "entity": f"KYOTOGHG ({gwp_to_use})",
			
 
				+        "unit": "GgCO2eq",
			
 
				+        "replace_data_entries": {"NO,NE": "NO"},
			
 
				+        "cat_codes_manual": {
			
 
				+            "Total CO2 Eq. Emissions without  LULUCF": "M.0.EL",
			
 
				+            "Total CO2 Eq. Emissions with  LULUCF": "M.LULUCF",
			
 
				+            # "1. Energy": "1. Energy",
			
 
				+            "A. Fuel Combustion": "1.A",
			
 
				+            "1.  Energy Industries": "1.A.1",
			
 
				+            "2.  Man. Ind. & Constr.": "1.A.2",
			
 
				+            "3.  Transport": "1.A.3",
			
 
				+            "4.  Other Sectors": "1.A.4",
			
 
				+            "5.  Other": "1.A.5",
			
 
				+            "B. Fugitive Emissions from Fuels": "1.B",
			
 
				+            "1.  Solid Fuels": "1.B.1",
			
 
				+            "2.  Oil and Natural Gas and other…": "1.B.2",
			
 
				+            # "2.  Industrial Processes": "2.  Industrial Processes",
			
 
				+            "A.  Mineral Industry": "2.A",
			
 
				+            "B.  Chemical Industry": "2.B",
			
 
				+            "C.  Metal Industry": "2.C",
			
 
				+            "D.  Non-energy products": "2.D",
			
 
				+            "E.  Electronics industry": "2.E",
			
 
				+            "F.  Product uses as ODS substitutes": "2.F",
			
 
				+            "G.  Other product manufacture and": "2.G",
			
 
				+            "use  H.  Other": "2.H",
			
 
				+            # "3.  Agriculture": "3.  Agriculture",
			
 
				+            "A.  Enteric Fermentation": "3.A.1",
			
 
				+            "B.  Manure Management": "3.A.2",
			
 
				+            "C.  Rice Cultivation": "3.C.7",
			
 
				+            "D.  Agricultural Soils": "3.C.4",  # TODO confirm!
			
 
				+            "E.  Prescribed Burning of Savannahs": "3.C.1.d",  # TODO confirm!
			
 
				+            "F.  Field Burning of Agricultural": "3.C.1.b",  # TODO confirm!
			
 
				+            "Residues  G.  Liming": "3.C.2",
			
 
				+            "H.  Urea applications": "3.C.3",
			
 
				+            "I.  Other carbon-containing": "3.D",  # TODO confirm!
			
 
				+            "fertilisers  4. Land Use, Land-Use Change and  Forestry": "3.B",
			
 
				+            "A. Forest Land": "3.B.1",
			
 
				+            "B. Cropland": "3.B.2",
			
 
				+            "C. Grassland": "3.B.3",
			
 
				+            "D. Wetlands": "3.B.4",
			
 
				+            "E. Settlements": "3.B.5",
			
 
				+            "F. Other Land": "3.B.6",
			
 
				+            "G. Harvested wood products": "3.D.1",
			
 
				+            "H. Other": "3.D.2",
			
 
				+            "5. Waste": "4",
			
 
				+            "A.  Solid Waste Disposal": "4.A",
			
 
				+            "B.  Biological treatment of solid": "4.B",
			
 
				+            "waste  C. Incineration and open burning of": "4.C",
			
 
				+            "D. Waste water treatment and": "4.D",
			
 
				+            "discharge  E.  Other": "4.E",
			
 
				+            "6.  Other": "5",
			
 
				+            "CO2 Emissions from Biomass": "M.BIO",
			
 
				+        },
			
 
				+        "drop_cols": [
			
 
				+            "change to BY",
			
 
				+            "change to PY",
			
 
				+        ],
			
 
				+        "header": ["orig_category"],
			
 
				+        "years": [
			
 
				+            "2008",
			
 
				+            "2009",
			
 
				+            "2010",
			
 
				+            "2011",
			
 
				+            "2012",
			
 
				+            "2013",
			
 
				+            "2014",
			
 
				+            "2015",
			
 
				+            "2016",
			
 
				+            "2017",
			
 
				+            "2018",
			
 
				+        ],
			
 
				+        "extra_columns": [
			
 
				+            "change to BY",
			
 
				+            "change to PY",
			
 
				+        ],
			
 
				+        "split_values": {
			
 
				+            "cat": "3B2",
			
 
				+            "keep_value_no": 1,
			
 
				+        },
			
 
				+        "page_defs": {
			
 
				+            "111": {"skip_rows_start": 1},
			
 
				+            "112": {"skip_rows_start": 1},
			
 
				+            "113": {"skip_rows_start": 1},
			
 
				+        },
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 conf = {
			
 
				     "energy": {
			
 
				         # TODO: List of entities are always keys of unit mapping dict
			
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
@@ -9,6 +9,7 @@ from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
 
				 from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import (
			
 
				     conf,
			
 
				     conf_general,
			
 
				+    conf_trend,
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
 
				     coords_terminologies,
			
@@ -34,6 +35,171 @@ if __name__ == "__main__":
 
				     def repl(m):  # noqa: D103
			
 
				         return m.group("code")
			
 
				 
			
 
				+    # ###
			
 
				+    # 2. Read trend tables
			
 
				+    # ###
			
 
				+
			
 
				+    df_trend = None
			
 
				+    for table in reversed(conf_trend.keys()):
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading {table} trend table.")
			
 
				+        df_table = None
			
 
				+        for page in conf_trend[table]["page_defs"].keys():
			
 
				+            print(f"Page {page}")
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                flavor="lattice",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+
			
 
				+            df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+            skip_rows_start = conf_trend[table]["page_defs"][page]["skip_rows_start"]
			
 
				+            if not skip_rows_start == 0:
			
 
				+                df_page = df_page[skip_rows_start:]
			
 
				+
			
 
				+            if df_table is None:
			
 
				+                # Reset index to avoid pandas' SettingWithCopyWarning
			
 
				+                df_table = df_page.reset_index(drop=True)
			
 
				+            else:
			
 
				+                df_table = pd.concat(
			
 
				+                    [
			
 
				+                        df_table,
			
 
				+                        df_page,
			
 
				+                    ],
			
 
				+                    axis=0,
			
 
				+                    join="outer",
			
 
				+                ).reset_index(drop=True)
			
 
				+
			
 
				+        df_table.columns = (
			
 
				+            conf_trend[table]["header"]
			
 
				+            + conf_trend[table]["years"]
			
 
				+            + conf_trend[table]["extra_columns"]
			
 
				+        )
			
 
				+
			
 
				+        # drop columns if needed
			
 
				+        if "drop_cols" in conf_trend[table].keys():
			
 
				+            df_table = df_table.drop(columns=conf_trend[table]["drop_cols"])
			
 
				+
			
 
				+        # category codes from category names
			
 
				+        df_table["category"] = df_table["orig_category"]
			
 
				+        # Remove line break characters
			
 
				+        df_table["category"] = df_table["category"].str.replace("\n", " ")
			
 
				+        # first the manual replacements
			
 
				+        df_table["category"] = df_table["category"].replace(
			
 
				+            conf_trend[table]["cat_codes_manual"]
			
 
				+        )
			
 
				+        # remove dots from category codes
			
 
				+        df_table["category"] = df_table["category"].str.replace(".", "")
			
 
				+        # then the regex replacements
			
 
				+        df_table["category"] = df_table["category"].str.replace(
			
 
				+            conf_general["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				+
			
 
				+        df_table = df_table.drop(columns="orig_category")
			
 
				+
			
 
				+        # clean values
			
 
				+        for year in conf_trend[table]["years"]:
			
 
				+            df_table[year] = df_table[year].replace(
			
 
				+                conf_trend[table]["replace_data_entries"]
			
 
				+            )
			
 
				+            df_table[year] = df_table[year].str.replace("\n", "")
			
 
				+            df_table[year] = df_table[year].str.replace(",", ".")
			
 
				+            # invisible numbers in trend table on page 112
			
 
				+            if "split_values" in conf_trend[table].keys():
			
 
				+                cat = conf_trend[table]["split_values"]["cat"]
			
 
				+                keep_value_no = conf_trend[table]["split_values"]["keep_value_no"]
			
 
				+                new_value = (
			
 
				+                    df_table.loc[df_table["category"] == cat, year]
			
 
				+                    .item()
			
 
				+                    .split(" ")[keep_value_no]
			
 
				+                )
			
 
				+                df_table.loc[df_table["category"] == cat, year] = new_value
			
 
				+
			
 
				+        if "fix_single_value" in conf_trend[table].keys():
			
 
				+            cat = conf_trend[table]["fix_single_value"]["cat"]
			
 
				+            year = conf_trend[table]["fix_single_value"]["year"]
			
 
				+            new_value = conf_trend[table]["fix_single_value"]["new_value"]
			
 
				+            df_table.loc[df_table["category"] == cat, year] = new_value
			
 
				+
			
 
				+        df_table["unit"] = conf_trend[table]["unit"]
			
 
				+        df_table["entity"] = conf_trend[table]["entity"]
			
 
				+
			
 
				+        # stack the tables vertically
			
 
				+        if df_trend is None:
			
 
				+            df_trend = df_table.reset_index(drop=True)
			
 
				+        else:
			
 
				+            df_trend = pd.concat(
			
 
				+                [
			
 
				+                    df_trend,
			
 
				+                    df_table,
			
 
				+                ],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    #     # fill empty strings with NaN and the forward fill category names
			
 
				+    #     df_page["category"] = df_page["category"].replace("", np.nan).ffill()
			
 
				+    #
			
 
				+    #     # remove /n from category names
			
 
				+    #     df_page["category"] = df_page["category"].str.replace("\n", "")
			
 
				+    #     # manual replacement of categories
			
 
				+    #     df_page["category"] = df_page["category"].replace(
			
 
				+    #         inv_conf_per_sector[sector]["cat_codes_manual"]
			
 
				+    #     )
			
 
				+    #
			
 
				+    #     # remove all thousand separator commas
			
 
				+    #     for year in trend_years :
			
 
				+    #         df_page[year] = df_page[year].str.replace(",", ".")
			
 
				+    #
			
 
				+    #     # add unit
			
 
				+    #     df_page["unit"] = inv_conf_per_sector[sector]["unit"]
			
 
				+    #
			
 
				+    #     # add entity if needed
			
 
				+    #     if "entity" in inv_conf_per_sector[sector].keys() :
			
 
				+    #         df_page["entity"] = inv_conf_per_sector[sector]["entity"]
			
 
				+    #
			
 
				+    #     if "unit_conversion" in inv_conf_per_sector[sector].keys() :
			
 
				+    #         for year in trend_years :
			
 
				+    #             index = inv_conf_per_sector[sector]["unit_conversion"]["index"]
			
 
				+    #             conv_factor = inv_conf_per_sector[sector]["unit_conversion"][
			
 
				+    #                 "conversion_factor"
			
 
				+    #             ]
			
 
				+    #             df_page.loc[index, year] = str(
			
 
				+    #                 conv_factor * float(df_page.loc[index, year])
			
 
				+    #             )
			
 
				+    #
			
 
				+    #     # stack the tables vertically
			
 
				+    #     if df_trend is None :
			
 
				+    #         df_trend = df_page
			
 
				+    #     else :
			
 
				+    #         df_trend = pd.concat(
			
 
				+    #             [
			
 
				+    #                 df_trend,
			
 
				+    #                 df_page,
			
 
				+    #             ],
			
 
				+    #             axis=0,
			
 
				+    #             join="outer",
			
 
				+    #         ).reset_index(drop=True)
			
 
				+    #
			
 
				+    df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_trend,
			
 
				+        coords_cols=coords_cols,
			
 
				+        # add_coords_cols=add_coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				+        meta_data=meta_data,
			
 
				+    )
			
 
				+    #
			
 
				+    ### convert to primap2 format ###
			
 
				+    print("Converting to primap2 format.")
			
 
				+    data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_if)
			
 
				+
			
 
				     # ###
			
 
				     # 1. Read in main tables
			
 
				     # ###