9 tháng trước cách đây · 8bbbb22cf4
--- a/src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py
@@ -13,13 +13,33 @@ inv_conf_per_year = {
 
				             "207": {
			
 
				                 "area": ["60,630,534,79"],
			
 
				                 "cols": ["387,444,495"],
			
 
				-                "skip_rows": 0,
			
 
				+                "skip_rows_start": 0,
			
 
				+                "skip_rows_end": 0,
			
 
				             },
			
 
				             "208": {
			
 
				-                "area": ["65,687,527,120"],
			
 
				+                "area": ["63,720,527,120"],
			
 
				                 "cols": ["380,437,491"],
			
 
				-                "skip_rows": 5,
			
 
				+                "skip_rows_start": 8,
			
 
				+                "skip_rows_end": 6,
			
 
				             },
			
 
				         },
			
 
				+        "rows_to_fix": {
			
 
				+            -2: [
			
 
				+                "ch4 emission from rice field",
			
 
				+                "indirect nitrous oxide (n2o) from n based fertilizer",
			
 
				+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
			
 
				+                "Total enteric ch4 emissions",
			
 
				+                "Total Manure ch4 emissions",
			
 
				+                "Total Direct n2o emissions from manure system",
			
 
				+                "Total indirect n2o emissions - Volatilization",
			
 
				+                "Total indirect n2o emissions - leaching/Runoff",
			
 
				+            ],
			
 
				+            3: [
			
 
				+                "3 - GHG Emissions Agriculture, Livestock & Forest and Other Land -Use"
			
 
				+            ],
			
 
				+            -3: [
			
 
				+                "Greenhouse gas source and sink categories",
			
 
				+            ],
			
 
				+        },
			
 
				     },
			
 
				 }
			
--- a/src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py
@@ -9,6 +9,7 @@ from config_bgd_bur1 import coords_terminologies, inv_conf_per_year
 
				 from unfccc_ghg_data.helper import (
			
 
				     downloaded_data_path,
			
 
				     extracted_data_path,
			
 
				+    fix_rows,
			
 
				 )
			
 
				 
			
 
				 if __name__ == "__main__":
			
@@ -39,6 +40,8 @@ if __name__ == "__main__":
 
				         df_year = None
			
 
				         for page in inv_conf_per_year[year]["page_defs"].keys():
			
 
				             print(f"Reading table from page {page}.")
			
 
				+
			
 
				+            # read from PDF
			
 
				             tables_inventory_original = camelot.read_pdf(
			
 
				                 str(input_folder / pdf_file),
			
 
				                 pages=page,
			
@@ -51,34 +54,42 @@ if __name__ == "__main__":
 
				 
			
 
				             df_page = tables_inventory_original[0].df
			
 
				 
			
 
				+            # cut rows at the top if needed
			
 
				+            skip_rows_start = inv_conf_per_year[year]["page_defs"][page][
			
 
				+                "skip_rows_start"
			
 
				+            ]
			
 
				+            if not skip_rows_start == 0:
			
 
				+                df_page = df_page[skip_rows_start:]
			
 
				+
			
 
				+            # cut rows at the bottom if needed
			
 
				+            skip_rows_end = inv_conf_per_year[year]["page_defs"][page]["skip_rows_end"]
			
 
				+            if not skip_rows_end == 0:
			
 
				+                df_page = df_page[:-skip_rows_end]
			
 
				+
			
 
				+            # stack the tables vertically
			
 
				             if df_year is None:
			
 
				-                df_year = df_page[
			
 
				-                    inv_conf_per_year[year]["page_defs"][page]["skip_rows"] :
			
 
				-                ]
			
 
				+                df_year = df_page
			
 
				             else:
			
 
				                 df_year = pd.concat(
			
 
				                     [
			
 
				                         df_year,
			
 
				-                        df_page[
			
 
				-                            inv_conf_per_year[year]["page_defs"][page]["skip_rows"] :
			
 
				-                        ],
			
 
				+                        df_page,
			
 
				                     ],
			
 
				                     axis=0,
			
 
				                     join="outer",
			
 
				                 ).reset_index(drop=True)
			
 
				 
			
 
				+        # fix content that spreads across multiple rows
			
 
				+        if "rows_to_fix" in inv_conf_per_year[year]:
			
 
				+            for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
			
 
				+                print(f"Merge content for {n_rows=}")
			
 
				+                df_year = fix_rows(
			
 
				+                    df_year,
			
 
				+                    rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
			
 
				+                    col_to_use=0,
			
 
				+                    n_rows=n_rows,
			
 
				+                )
			
 
				         pass
			
 
				-        # # fix content that spreads across multiple rows
			
 
				-        # if "rows_to_fix" in inv_conf_per_year[year]:
			
 
				-        #     for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
			
 
				-        #         print(f"Merge content for {n_rows=}")
			
 
				-        #         df_year = fix_rows(
			
 
				-        #             df_year,
			
 
				-        #             rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
			
 
				-        #             col_to_use=0,
			
 
				-        #             n_rows=n_rows,
			
 
				-        #         )
			
 
				-        #
			
 
				         # df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				         #
			
 
				         # skip_rows = 11