il y a 9 mois · 8bbbb22cf4
--- a/src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py
@@ -13,13 +13,33 @@ inv_conf_per_year = {
 
															             "207": {
														
 
															                 "area": ["60,630,534,79"],
														
 
															                 "cols": ["387,444,495"],
														
 
															-                "skip_rows": 0,
														
 
															+                "skip_rows_start": 0,
														
 
															+                "skip_rows_end": 0,
														
 
															             },
														
 
															             "208": {
														
 
															-                "area": ["65,687,527,120"],
														
 
															+                "area": ["63,720,527,120"],
														
 
															                 "cols": ["380,437,491"],
														
 
															-                "skip_rows": 5,
														
 
															+                "skip_rows_start": 8,
														
 
															+                "skip_rows_end": 6,
														
 
															             },
														
 
															         },
														
 
															+        "rows_to_fix": {
														
 
															+            -2: [
														
 
															+                "ch4 emission from rice field",
														
 
															+                "indirect nitrous oxide (n2o) from n based fertilizer",
														
 
															+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
														
 
															+                "Total enteric ch4 emissions",
														
 
															+                "Total Manure ch4 emissions",
														
 
															+                "Total Direct n2o emissions from manure system",
														
 
															+                "Total indirect n2o emissions - Volatilization",
														
 
															+                "Total indirect n2o emissions - leaching/Runoff",
														
 
															+            ],
														
 
															+            3: [
														
 
															+                "3 - GHG Emissions Agriculture, Livestock & Forest and Other Land -Use"
														
 
															+            ],
														
 
															+            -3: [
														
 
															+                "Greenhouse gas source and sink categories",
														
 
															+            ],
														
 
															+        },
														
 
															     },
														
 
															 }
														
--- a/src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py
@@ -9,6 +9,7 @@ from config_bgd_bur1 import coords_terminologies, inv_conf_per_year
 
															 from unfccc_ghg_data.helper import (
														
 
															     downloaded_data_path,
														
 
															     extracted_data_path,
														
 
															+    fix_rows,
														
 
															 )
														
 
															 if __name__ == "__main__":
														
@@ -39,6 +40,8 @@ if __name__ == "__main__":
 
															         df_year = None
														
 
															         for page in inv_conf_per_year[year]["page_defs"].keys():
														
 
															             print(f"Reading table from page {page}.")
														
 
															+
														
 
															+            # read from PDF
														
 
															             tables_inventory_original = camelot.read_pdf(
														
 
															                 str(input_folder / pdf_file),
														
 
															                 pages=page,
														
@@ -51,34 +54,42 @@ if __name__ == "__main__":
 
															             df_page = tables_inventory_original[0].df
														
 
															+            # cut rows at the top if needed
														
 
															+            skip_rows_start = inv_conf_per_year[year]["page_defs"][page][
														
 
															+                "skip_rows_start"
														
 
															+            ]
														
 
															+            if not skip_rows_start == 0:
														
 
															+                df_page = df_page[skip_rows_start:]
														
 
															+
														
 
															+            # cut rows at the bottom if needed
														
 
															+            skip_rows_end = inv_conf_per_year[year]["page_defs"][page]["skip_rows_end"]
														
 
															+            if not skip_rows_end == 0:
														
 
															+                df_page = df_page[:-skip_rows_end]
														
 
															+
														
 
															+            # stack the tables vertically
														
 
															             if df_year is None:
														
 
															-                df_year = df_page[
														
 
															-                    inv_conf_per_year[year]["page_defs"][page]["skip_rows"] :
														
 
															-                ]
														
 
															+                df_year = df_page
														
 
															             else:
														
 
															                 df_year = pd.concat(
														
 
															                     [
														
 
															                         df_year,
														
 
															-                        df_page[
														
 
															-                            inv_conf_per_year[year]["page_defs"][page]["skip_rows"] :
														
 
															-                        ],
														
 
															+                        df_page,
														
 
															                     ],
														
 
															                     axis=0,
														
 
															                     join="outer",
														
 
															                 ).reset_index(drop=True)
														
 
															+        # fix content that spreads across multiple rows
														
 
															+        if "rows_to_fix" in inv_conf_per_year[year]:
														
 
															+            for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
														
 
															+                print(f"Merge content for {n_rows=}")
														
 
															+                df_year = fix_rows(
														
 
															+                    df_year,
														
 
															+                    rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
														
 
															+                    col_to_use=0,
														
 
															+                    n_rows=n_rows,
														
 
															+                )
														
 
															         pass
														
 
															-        # # fix content that spreads across multiple rows
														
 
															-        # if "rows_to_fix" in inv_conf_per_year[year]:
														
 
															-        #     for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
														
 
															-        #         print(f"Merge content for {n_rows=}")
														
 
															-        #         df_year = fix_rows(
														
 
															-        #             df_year,
														
 
															-        #             rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
														
 
															-        #             col_to_use=0,
														
 
															-        #             n_rows=n_rows,
														
 
															-        #         )
														
 
															-        #
														
 
															         # df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
														
 
															         #
														
 
															         # skip_rows = 11