Просмотр исходного кода

tables on pages 103, 103, 114-115, 119, 125-126

Daniel Busch 9 месяцев назад
Родитель
Сommit
ac47fb5056
1 измененных файлов с 136 добавлено и 11 удалено
  1. 136 11
      src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py

+ 136 - 11
src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py

@@ -280,7 +280,7 @@ if __name__ == "__main__":
     # ###
     # 3. Read in aggregated tables from 1990 - 2020
     # ###
-    # tables: 32, 43 - 44, 74, 103, // 114 - 115, 119, 125 - 126, 151, 157, 161 - 162
+    # tables: 32, 43 - 44, 74, 103, 114 - 115,  119,  125 - 126, // 151, 157, 161 - 162
     # Work in progress
     # noinspection PyInterpreter
     inv_conf_per_sector = {
@@ -415,7 +415,7 @@ if __name__ == "__main__":
                     "cols": ["97,182,259,326,403,474"],
                 },
             },
-            "entity": "KYOTOGHG (SARGWP100)",
+            # "entity": "KYOTOGHG (SARGWP100)",
             # "category_column" : "Categories",
             # "columns_to_drop" : ["Categories"],
             # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
@@ -438,7 +438,7 @@ if __name__ == "__main__":
                 " (Gg CO2e) ": "3.A",
             },
             "multi_entity": {
-                "unit": ["Gg CH4", "Gg CH4", "Gg CH4", "Gg CO2e", "Gg CO2e", "Gg CO2e"],
+                "unit": ["Gg", "Gg", "Gg", "Gg CO2e", "Gg CO2e", "Gg CO2e"],
                 "entity": [
                     "CH4",
                     "CH4",
@@ -449,13 +449,126 @@ if __name__ == "__main__":
                 ],
             },
         },
+        "biomass_burning": {
+            "page_defs": {
+                "114": {
+                    "area": ["70,214,544,78"],
+                    "cols": ["116,185,239,304,365,426,491"],
+                },
+                "115": {
+                    "area": ["72,777,545,505"],
+                    "cols": ["123,190,250,313,374,438,495"],
+                },
+            },
+            "last_year": "2020",
+            "col_to_use" : 5,
+            "rows_to_fix": {
+                7: [
+                    "3.C.1 - Emiss",
+                ],
+            },
+            "year_column": "  Year  ",
+            # TODO: These categories are technically duplicate, just with a different unit
+            "categories_to_drop" : ['3.C.1 -Emiss  CH4 (Gg CO2e)',
+                 'ions from bioma (CO2e) N2O (Gg CO2e)',
+                 'ss burning  Total (Gg CO2e)'],
+            # TODO: This is far from than the actual categories but works for now
+            "cat_codes_manual": {
+                ' 3.C.1  CH4 (Gg) ' : '3.C.1',
+                 ' -Emissions fr  N2O (Gg) ' : '3.C.1',
+                 ' om biomass bur  NOx (Gg) ' : '3.C.1',
+                 ' ning  CO(Gg) ' : '3.C.1',
+            },
+            "multi_entity": {
+                "unit": ["Gg", "Gg", "Gg", "Gg"],
+                "entity": [
+                    "CH4",
+                    "N2O",
+                    "NOx",
+                    "CO",
+                ],
+            },
+        },
+        "managed_soils_direct" : {
+            "page_defs" : {
+                "119" : {
+                    "area" : ["70,600,541,173"],
+                    "cols" : ["114,191,245,328,400,476"],
+                },
+            },
+            "last_year" : "2020",
+            "col_to_use" : 3,
+            "rows_to_fix" : {
+                10 : [
+                    "Urine and dung",
+                ],
+            },
+            "year_column" : '  Year   ',
+            # # TODO: technically duplicate, just with a different unit
+            "categories_to_drop" : [' 3.C.4 -Direct N2O Emissions from managed soils (CO2e) Gg CO2e', ],
+            # TODO: This is far from than the actual categories but works for now
+            "cat_codes_manual" : {
+                # TODO the next 4 categories are made up placeholders
+                ' Inorganic N fertilizer application  N2O (Gg)' : '3.C.4.i',
+                ' Organic N applied as fertilizer (manure) N2O (Gg)' : '3.C.4.ii',
+                'Urine and dung N deposited on pasture, range and paddock by grazing animals N2O (Gg)' : '3.C.4.iii',
+                '  N in crop residues  N2O (Gg)' : '3.C.4.iiii',
+                ' 3.C.4 -Direct N2O Emissions from managed soils N2O (Gg)' : '3.C.4',
+            },
+            "multi_entity" : {
+                "unit" : ["Gg", "Gg", "Gg", "Gg", "Gg"],
+                "entity" : [
+                    "N2O",
+                    "N2O",
+                    "N2O",
+                    "N2O",
+                    "N2O",
+                ],
+            },
+        },
+        "managed_soils_indirect" : {
+            "page_defs" : {
+            "125" : {
+                "area" : ["74,214,539,83"],
+                "cols" : ["125,222,309,423"],
+            },
+            "126" : {
+                "area" : ["72,775,539,369"],
+                "cols" : ["148,248,351,459"],
+            },
+            },
+            "last_year" : "2020",
+            "col_to_use" : 3,
+            "rows_to_fix" : {
+                7 : [
+                    "3.C.5 - Indirect N2O",
+                ],
+            },
+            "year_column" : '  Year  ',
+            # # TODO: technically duplicate, just with a different unit
+            "categories_to_drop" : ['3.C.5 -Indirect N2O emissions from managed  soils Gg CO2e'],
+            # TODO: This is far from than the actual categories but works for now
+            "cat_codes_manual" : {
+                # TODO the next 2 categories are made up placeholders
+                ' Volatilization  pathway Gg N2O' : '3.C.5.i',
+                ' Leaching/runoff  pathway Gg N2O' : '3.C.5.ii',
+                '3.C.5 -Indirect N2O emissions from managed  soils Gg N2O' : '3.C.5',
+            },
+            "entity" : "N2O",
+            # "category_column" : "Categories",
+            # "columns_to_drop" : ["Categories"],
+            # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+            "unit" : "Gg",
+        },
     }
 
     df_agg = None
-    # TODO remove reversed order again (only for development)
+    # TODO remove `reversed` (only for development)
     for sector in list(reversed(list(inv_conf_per_sector.keys()))):
         print("-" * 60)
-        print(f"Reading sector {sector}.")
+        print(
+            f"Reading sector {sector} on page(s) {[*inv_conf_per_sector[sector]['page_defs']]}."
+        )
 
         df_sector = None
 
@@ -489,10 +602,15 @@ if __name__ == "__main__":
         if "rows_to_fix" in inv_conf_per_sector[sector]:
             for n_rows in inv_conf_per_sector[sector]["rows_to_fix"].keys():
                 print(f"Merge content for {n_rows=}")
+                # set the row
+                if "col_to_use" in inv_conf_per_sector[sector].keys():
+                    col_to_use = inv_conf_per_sector[sector]["col_to_use"]
+                else:
+                    col_to_use = 0
                 df_sector = fix_rows(
                     df_sector,
                     rows_to_fix=inv_conf_per_sector[sector]["rows_to_fix"][n_rows],
-                    col_to_use=0,
+                    col_to_use=col_to_use,
                     n_rows=n_rows,
                 )
 
@@ -502,6 +620,7 @@ if __name__ == "__main__":
             for row in inv_conf_per_sector[sector]["rows_to_drop"]:
                 df_sector = df_sector.drop(index=row)
 
+        # TODO: Is it necessary to set the index here?
         df_sector = df_sector.set_index(0)
 
         # transpose so categegories are in first columns
@@ -513,21 +632,26 @@ if __name__ == "__main__":
 
         df_sector["category"] = df_sector["category"].str.replace("\n", "")
 
+        # TODO Is it not the same as remove categories further down?
+        if "categories_to_drop" in inv_conf_per_sector[sector]:
+            for row in inv_conf_per_sector[sector]["categories_to_drop"]:
+                        row_to_delete = df_sector.index[df_sector["category"] == row][0]
+                        df_sector = df_sector.drop(index=row_to_delete)
+
         df_sector.loc[:, "category"] = df_sector.loc[:, "category"].replace(
             inv_conf_per_sector[sector]["cat_codes_manual"]
         )
 
         if "multi_entity" in inv_conf_per_sector[sector]:
-            pass
             df_sector["entity"] = inv_conf_per_sector[sector]["multi_entity"]["entity"]
             df_sector["unit"] = inv_conf_per_sector[sector]["multi_entity"]["unit"]
-            df_sector = df_sector.set_index(["entity", "unit", "category"])
+            # df_sector = df_sector.set_index(["entity", "unit", "category"])
 
         else:
-            # unit is always Gg
+            # unit is always the same
             df_sector.loc[:, "unit"] = inv_conf_per_sector[sector]["unit"]
 
-            # only one entity per table
+            # and only one entity per table
             df_sector.loc[:, "entity"] = inv_conf_per_sector[sector]["entity"]
 
         # Some categories are in two tables (summary and sector)
@@ -537,7 +661,7 @@ if __name__ == "__main__":
                 row_to_delete = df_sector.index[df_sector["category"] == row][0]
                 df_sector = df_sector.drop(index=row_to_delete)
 
-        if df_sector is None:
+        if df_agg is None:
             df_agg = df_sector
         else:
             df_agg = pd.concat(
@@ -550,6 +674,7 @@ if __name__ == "__main__":
             df_agg.loc[:, year] = df_agg[year].str.replace(",", "")
 
         # print(df_agg)
+    pass
 
     ### convert to interchange format ###
     df_agg_IF = pm2.pm2io.convert_wide_dataframe_if(