Daniel Busch 9 месяцев назад
Родитель
Сommit
e29b6fa067
1 измененных файлов с 81 добавлено и 16 удалено
  1. 81 16
      src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py

+ 81 - 16
src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py

@@ -281,14 +281,54 @@ if __name__ == "__main__":
     # 2.5 Read harvested wood products table
     # ###
 
+    # The table for harvested wood products is in a different format
+    # and needs to be read in separately.
+
     inv_conf_harvested_wood_products = {
-        "page_defs": {
-            "part_1" :
-                {
-                    "area" : ["52,690,555,647"],
-                    "cols" : ["101,149,196,231,268,310,351,398,433,476,514"],
+        'page' : '151',
+        "category_column" : 'Categories',
+        "cat_codes_manual" : {
+            'GHG emission' : '3.D.1',
+        },
+        'unit' : 'Gg',
+        'entity' : 'CO2',
+        'parts' : {
+            "part_1" : {
+                "page_defs" :
+                    {
+                        "area" : ["52,690,555,647"],
+                        "cols" : ["101,149,196,231,268,310,351,398,433,476,514"],
+                    },
+                "rows_to_fix" : {
+                    3 : [
+                        "GHG",
+                    ],
                 },
-        }
+            },
+            "part_2" : {
+                "page_defs" :
+                    {
+                        "area" : ["52,637,555,596"],
+                        "cols" : ["99,150,197,239,281,326,372,425,469,516"],
+                    },
+                "rows_to_fix" : {
+                    3 : [
+                        "GHG",
+                    ],
+                },
+            },
+            "part_3" : {
+                "page_defs" :
+                    {
+                        "area" : ["52,591,550,547"],
+                        "cols" : ["106,156,197,239,281,326,372,420,465,509"],
+                    },
+                "rows_to_fix" : {
+                    3 : [
+                        "GHG",
+                    ],
+                },
+            }},
     }
 
     print("-" * 60)
@@ -297,34 +337,59 @@ if __name__ == "__main__":
     )
 
     df_hwp = None
-    for part in [*inv_conf_harvested_wood_products["page_defs"]] :
+    for part in [*inv_conf_harvested_wood_products['parts']] :
         tables_inventory_original = camelot.read_pdf(
             str(input_folder / pdf_file),
-            pages="151",
-            table_areas=inv_conf_harvested_wood_products["page_defs"][part]["area"],
-            columns=inv_conf_harvested_wood_products["page_defs"][part]["cols"],
+            pages=inv_conf_harvested_wood_products['page'],
+            table_areas=inv_conf_harvested_wood_products['parts'][part]["page_defs"]["area"],
+            columns=inv_conf_harvested_wood_products['parts'][part]["page_defs"]["cols"],
             flavor="stream",
             split_text=True,
         )
 
         df_hwp_part = tables_inventory_original[0].df
 
+        if "rows_to_fix" in inv_conf_harvested_wood_products['parts'][part]:
+            for n_rows in inv_conf_harvested_wood_products['parts'][part]["rows_to_fix"].keys():
+                df_hwp_part = fix_rows(
+                    df_hwp_part,
+                    rows_to_fix=inv_conf_harvested_wood_products['parts'][part]["rows_to_fix"][n_rows],
+                    col_to_use=0,
+                    n_rows=n_rows,
+                )
+
+        df_hwp_part = df_hwp_part.drop(1, axis=0).reset_index(drop=True)
+
         if df_hwp is None :
             df_hwp = df_hwp_part
         else :
-            df_sector = pd.concat(
-                [df_hwp, df_hwp_part],
+            df_hwp = pd.concat(
+                [df_hwp, df_hwp_part.drop(0, axis=1)],
                 axis=1,
                 join="outer",
             ).reset_index(drop=True)
 
-    pass
+    df_hwp = pd.DataFrame(df_hwp.values[1 :], columns=df_hwp.iloc[0])
+
+    df_hwp = df_hwp.rename(
+        columns={inv_conf_harvested_wood_products["category_column"] : "category"}
+    )
+
+    df_hwp.loc[:, "category"] = df_hwp.loc[:, "category"].replace(
+        inv_conf_harvested_wood_products["cat_codes_manual"]
+    )
+
+
+    # unit is always the same
+    df_hwp.loc[:, "unit"] = inv_conf_harvested_wood_products["unit"]
+
+    # and only one entity per table
+    df_hwp.loc[:, "entity"] = inv_conf_harvested_wood_products["entity"]
+
     # ###
     # 3. Read in aggregated tables from 1990 - 2020
     # ###
-    # tables: 32, 43 - 44, 74, 103, 114 - 115,  119,  125 - 126,   157  161 - 162, // 151
-    # Work in progress
-    # noinspection PyInterpreter ??
+
     inv_conf_per_sector = {
         "total": {
             "page_defs": {