10 months ago · 516ae593f1
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mongolia/config_MNG_BUR2.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mongolia/config_MNG_BUR2.py
@@ -142,6 +142,140 @@ inv_conf_per_year = {
 
				     },
			
 
				 }
			
 
				 
			
 
				+inv_conf_per_entity = {
			
 
				+    "CO" : {
			
 
				+        'page_defs' : {
			
 
				+            '39' : {
			
 
				+                "area" : ['53,646,550,588'],
			
 
				+                "cols" : ['279,328,364,400,440,478,520'],
			
 
				+            },
			
 
				+        },
			
 
				+        "cat_codes_manual" : {"Total National Emissions" : "0"},
			
 
				+        "category_column" : "Categories",
			
 
				+        "columns_to_drop" : ["Categories"],
			
 
				+        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg",
			
 
				+    },
			
 
				+    "NOx" : {
			
 
				+        'page_defs' : {
			
 
				+            '38' : {
			
 
				+                "area" : ['53,120,538,93'],
			
 
				+                "cols" : ['281,329,365,405,441,477,513'],
			
 
				+            },
			
 
				+            '39' : {
			
 
				+                "area" : ['51,772,539,740'],
			
 
				+                "cols" : ['285,332,368,404,444,476,514'],
			
 
				+            },
			
 
				+
			
 
				+        },
			
 
				+        "cat_codes_manual" : {"Total National Emissions" : "0"},
			
 
				+        "category_column" : "Categories",
			
 
				+        "columns_to_drop" : ["Categories"],
			
 
				+        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg",
			
 
				+    },
			
 
				+    "HFCs" : {
			
 
				+        'page_defs' : {
			
 
				+            '38' : {
			
 
				+                "area" : ['55,469,534,364'],
			
 
				+                "cols" : ['251,302,367,427,486'],
			
 
				+            },
			
 
				+        },
			
 
				+        "cat_codes_manual" : {"Total National Emissions (Gg CO2e)" : "0"},
			
 
				+        "category_column" : "Categories",
			
 
				+        "columns_to_drop" : ["Share, %", "Categories"],
			
 
				+        "years" : ['2007', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg CO2e",
			
 
				+    },
			
 
				+    "N2O" : {
			
 
				+        'page_defs' : {
			
 
				+            '37' : {
			
 
				+                "area" : ['55,106,556,79'],
			
 
				+                "cols" : ['170,258,305,347,394,440,476,512'],
			
 
				+            },
			
 
				+            '38' : {
			
 
				+                "area" : ['55,773,555,664'],
			
 
				+                "cols" : ['215,264,306,353,395,439,476,513'],
			
 
				+            },
			
 
				+        },
			
 
				+        "rows_to_fix" : {
			
 
				+            3 : ["3 - Agriculture, Forestry, and Other",
			
 
				+                 "3.C - Aggregate sources and non-",
			
 
				+                 "4.D - Wastewater Treatment and",
			
 
				+                 ]
			
 
				+        },
			
 
				+        "cat_codes_manual" : {"Total National Emissions (Gg N2O)" : "0"},
			
 
				+        "category_column" : "Categories",
			
 
				+        "columns_to_drop" : ["Share, %", "Categories"],
			
 
				+        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg",
			
 
				+    },
			
 
				+    "CH4" : {
			
 
				+        'page_defs' : {
			
 
				+            '37' : {
			
 
				+                "area" : ['55,423,552,216'],
			
 
				+                "cols" : ['186,250,296,326,383,427,467,507'],
			
 
				+            },
			
 
				+        },
			
 
				+        "rows_to_fix" : {
			
 
				+            3 : ["1.A - Fuel Combustion",
			
 
				+                 "1.B - Fugitive emissions from",
			
 
				+                 "3 - Agriculture, Forestry, and",
			
 
				+                 "3.C - Aggregate sources and",
			
 
				+                 "4.D - Wastewater Treatment",
			
 
				+                 "Total National Emissions (Gg",
			
 
				+                 ]
			
 
				+        },
			
 
				+        "cat_codes_manual" : {"Total National Emissions (Gg CH4)" : "0"},
			
 
				+        "category_column" : "Categories",
			
 
				+        "columns_to_drop" : ["Share, %", "Categories"],
			
 
				+        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg",
			
 
				+    },
			
 
				+    "CO2" : {
			
 
				+        'page_defs' : {
			
 
				+            '36' : {
			
 
				+                "area" : ['53,147,556,79'],
			
 
				+                "cols" : ['150,204,254,306,352,406,459,513'],
			
 
				+            },
			
 
				+            '37' : {
			
 
				+                "area" : ['51,772,561,515'],
			
 
				+                "cols" : ['151,202,252,305,357,404,463,517'],
			
 
				+            }
			
 
				+        },
			
 
				+        "rows_to_fix" : {
			
 
				+            2 : ["Categories",
			
 
				+                 "Emissions and",
			
 
				+                 ],
			
 
				+            3 : ["1.A - Fuel",
			
 
				+                 "1.B - Fugitive",
			
 
				+                 "2 - Industrial Processes",
			
 
				+                 "3 - Agriculture,",
			
 
				+                 "Total National",
			
 
				+                 "Total National",
			
 
				+                 ],
			
 
				+            5 : ["2.D - Non-Energy"],
			
 
				+            -2 : [
			
 
				+                "Categories ",
			
 
				+                "Emissions and Removals (Gg CO2)",
			
 
				+            ],
			
 
				+
			
 
				+        },
			
 
				+        "rows_to_drop" : [
			
 
				+            "Total National Emissions (Gg CO2)",
			
 
				+            "Total National Removals (Gg CO2)"
			
 
				+        ],
			
 
				+        "columns_to_drop" : ["Share, %", " Categories "],
			
 
				+        "cat_codes_manual" : {"Total National Emissions and Removals (Gg CO2)" : "0"},
			
 
				+        "category_column" : " Categories ",
			
 
				+        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
			
 
				+        "unit" : "Gg",
			
 
				+    },
			
 
				+    "entity_row" : 0,
			
 
				+    "unit_row" : 1,
			
 
				+
			
 
				+}
			
 
				+
			
 
				 # primap2 format conversion
			
 
				 coords_cols = {
			
 
				     "category": "category",
			
@@ -149,6 +283,13 @@ coords_cols = {
 
				     "unit": "unit",
			
 
				 }
			
 
				 
			
 
				+# TODO: That's probably the same as above, test again.
			
 
				+coords_cols_wide = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				 coords_defaults = {
			
 
				     "source": "MNG-GHG-Inventory",
			
 
				     "provenance": "measured",
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mongolia/read_MNG_BUR2_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mongolia/read_MNG_BUR2_from_pdf.py
@@ -11,6 +11,7 @@ from UNFCCC_GHG_data.helper import (
 
				 from config_MNG_BUR2 import (
			
 
				     inv_conf,
			
 
				     inv_conf_per_year,
			
 
				+    inv_conf_per_entity,
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
 
				     coords_terminologies,
			
@@ -19,6 +20,7 @@ from config_MNG_BUR2 import (
 
				     meta_data,
			
 
				     country_processing_step1,
			
 
				     gas_baskets,
			
 
				+    coords_cols_wide
			
 
				 )
			
 
				 
			
 
				 # ###
			
@@ -37,7 +39,7 @@ category_column = f"category ({coords_terminologies['category']})"
 
				 compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				 # ###
			
 
				-# 1. Read in tables
			
 
				+# 1. Read in main tables
			
 
				 # ###
			
 
				 
			
 
				 df_all = None
			
@@ -152,6 +154,7 @@ for year in inv_conf_per_year.keys():
 
				             join="outer",
			
 
				         ).reset_index(drop=True)
			
 
				 
			
 
				+# TODO: choose different name for df here
			
 
				 ### convert to interchange format ###
			
 
				 print("Converting to interchange format.")
			
 
				 df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
@@ -168,7 +171,122 @@ df_all_IF = pm2.pm2io.convert_long_dataframe_if(
 
				 
			
 
				 ### convert to primap2 format ###
			
 
				 print("Converting to primap2 format.")
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+data_main_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+
			
 
				+# ###
			
 
				+# 2. Read in trend tables
			
 
				+# ###
			
 
				+
			
 
				+df_all = None
			
 
				+for entity in ["CO2", "CH4", "N2O", "HFCs", "NOx", "CO"]:
			
 
				+    print("-" * 60)
			
 
				+    print(f"Reading entity {entity}.")
			
 
				+
			
 
				+    df_entity = None
			
 
				+
			
 
				+    for page in inv_conf_per_entity[entity]["page_defs"].keys():
			
 
				+        print(f"Reading page {page}.")
			
 
				+
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=page,
			
 
				+            table_areas=inv_conf_per_entity[entity]["page_defs"][page]["area"],
			
 
				+            columns=inv_conf_per_entity[entity]["page_defs"][page]["cols"],
			
 
				+            flavor="stream",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				+        df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+        if df_entity is None:
			
 
				+            df_entity = df_page
			
 
				+        else:
			
 
				+            df_entity = pd.concat(
			
 
				+                [df_entity, df_page],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+        print(f"adding table from page {page}.")
			
 
				+
			
 
				+    if "rows_to_fix" in inv_conf_per_entity[entity]:
			
 
				+        for n_rows in inv_conf_per_entity[entity]["rows_to_fix"].keys():
			
 
				+            print(f"Merge content for {n_rows=}")
			
 
				+            df_entity = fix_rows(
			
 
				+                df_entity,
			
 
				+                rows_to_fix=inv_conf_per_entity[entity]["rows_to_fix"][n_rows],
			
 
				+                col_to_use=0,
			
 
				+                n_rows=n_rows,
			
 
				+            )
			
 
				+
			
 
				+    df_entity.columns = df_entity.iloc[0, :]
			
 
				+    df_entity = df_entity[1:]
			
 
				+
			
 
				+    # unit is always Gg
			
 
				+    df_entity.loc[:, "unit"] = inv_conf_per_entity[entity]["unit"]
			
 
				+
			
 
				+    # only one entity per table
			
 
				+    df_entity.loc[:, "entity"] = entity
			
 
				+
			
 
				+    # TODO: Fix pandas "set value on slice of copy" warning
			
 
				+    df_entity.loc[:, "category"] = df_entity.loc[
			
 
				+        :, inv_conf_per_entity[entity]["category_column"]
			
 
				+    ]
			
 
				+
			
 
				+    if "rows_to_drop" in inv_conf_per_entity[entity]:
			
 
				+        for row in inv_conf_per_entity[entity]["rows_to_drop"]:
			
 
				+            row_to_delete = df_entity.index[df_entity["category"] == row][0]
			
 
				+            df_entity = df_entity.drop(index=row_to_delete)
			
 
				+
			
 
				+    df_entity.loc[:, "category"] = df_entity.loc[:, "category"].replace(
			
 
				+        inv_conf_per_entity[entity]["cat_codes_manual"]
			
 
				+    )
			
 
				+
			
 
				+    def repl(m):
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    df_entity.loc[:, "category"] = df_entity["category"].str.replace(
			
 
				+        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    )
			
 
				+
			
 
				+    df_entity = df_entity.drop(columns=inv_conf_per_entity[entity]["columns_to_drop"])
			
 
				+
			
 
				+    for year in inv_conf_per_entity[entity]["years"]:
			
 
				+        df_entity.loc[:, year] = df_entity[year].str.replace(",", "")
			
 
				+
			
 
				+    if df_all is None:
			
 
				+        df_all = df_entity
			
 
				+    else:
			
 
				+        df_all = pd.concat(
			
 
				+            [df_all, df_entity],
			
 
				+            axis=0,
			
 
				+            join="outer",
			
 
				+        ).reset_index(drop=True)
			
 
				+
			
 
				+
			
 
				+### convert to interchange format ###
			
 
				+df_trend_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+    data_wide=df_all,
			
 
				+    coords_cols = coords_cols_wide,
			
 
				+    coords_defaults=coords_defaults,
			
 
				+    coords_terminologies=coords_terminologies,
			
 
				+    coords_value_mapping=coords_value_mapping,
			
 
				+    #filter_remove=filter_remove,
			
 
				+    meta_data=meta_data,
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				+    )
			
 
				+
			
 
				+### convert to primap2 format ###
			
 
				+print("Converting to primap2 format.")
			
 
				+data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				+
			
 
				+# ###
			
 
				+# Merge main and trend tables.
			
 
				+# ###
			
 
				+
			
 
				+print("Merging main and trend table.")
			
 
				+data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
			
 
				+
			
 
				+
			
 
				 
			
 
				 # ###
			
 
				 # Save raw data to IF and native format.