8 months ago · 06f52b18bf
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py
@@ -2,13 +2,23 @@
 
				 Configuration file to read Saint Kitts and Nevis' BUR 1.
			
 
				 """
			
 
				 
			
 
				+conf_general = {
			
 
				+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
			
 
				+}
			
 
				+
			
 
				 conf = {
			
 
				     "energy": {
			
 
				+        "header": ["orig_category", "CO2", "CH4", "N2O", "NOX", "CO", "NMVOCs", "SO2"],
			
 
				+        "unit": [8 * "Gg"],
			
 
				+        "cat_codes_manual": {
			
 
				+            "Information Items": "MEMO",
			
 
				+            "CO2 from Biomass Combustion for Energy Production": "MBIO",
			
 
				+        },
			
 
				         "page_defs": {
			
 
				-            "149": {"skip_rows_start": 0},
			
 
				-            "150": {"skip_rows_start": 0},
			
 
				-            "151": {"skip_rows_start": 0},
			
 
				-            "152": {"skip_rows_start": 0},
			
 
				-        }
			
 
				+            "149": {"skip_rows_start": 2},
			
 
				+            "150": {"skip_rows_start": 2},
			
 
				+            "151": {"skip_rows_start": 2},
			
 
				+            "152": {"skip_rows_start": 2},
			
 
				+        },
			
 
				     }
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py
@@ -5,7 +5,10 @@ import camelot
 
				 import pandas as pd
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import conf
			
 
				+from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import (
			
 
				+    conf,
			
 
				+    conf_general,
			
 
				+)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
@@ -35,6 +38,7 @@ if __name__ == "__main__":
 
				 
			
 
				         df_sector = None
			
 
				         for page in conf[sector]["page_defs"].keys():
			
 
				+            print(f"Page {page}")
			
 
				             tables_inventory_original = camelot.read_pdf(
			
 
				                 str(input_folder / pdf_file),
			
 
				                 pages=page,
			
@@ -44,6 +48,10 @@ if __name__ == "__main__":
 
				 
			
 
				             df_page = tables_inventory_original[0].df
			
 
				 
			
 
				+            skip_rows_start = conf[sector]["page_defs"][page]["skip_rows_start"]
			
 
				+            if not skip_rows_start == 0:
			
 
				+                df_page = df_page[skip_rows_start:]
			
 
				+
			
 
				             if df_sector is None:
			
 
				                 df_sector = df_page
			
 
				             else:
			
@@ -56,4 +64,24 @@ if __name__ == "__main__":
 
				                     join="outer",
			
 
				                 ).reset_index(drop=True)
			
 
				 
			
 
				+        df_sector.columns = conf[sector]["header"]
			
 
				+
			
 
				+        df_sector["category"] = df_sector["orig_category"]
			
 
				+
			
 
				+        # Remove line break characters
			
 
				+        df_sector["category"] = df_sector["category"].str.replace("\n", " ")
			
 
				+
			
 
				+        # first the manual replacements
			
 
				+        df_sector["category"] = df_sector["category"].replace(
			
 
				+            conf[sector]["cat_codes_manual"]
			
 
				+        )
			
 
				+
			
 
				+        # then the regex replacements
			
 
				+        df_sector["category"] = df_sector["category"].str.replace(
			
 
				+            conf_general["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				+
			
 
				+        df_sector = df_sector.drop(columns="orig_category")
			
 
				+        pass
			
 
				+
			
 
				         pass