Browse Source

clean up code

Daniel Busch 10 months ago
parent
commit
80066b8e22

+ 126 - 134
UNFCCC_GHG_data/UNFCCC_reader/Mongolia/config_MNG_BUR2.py

@@ -12,9 +12,9 @@ inv_conf = {
     "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
     "cat_codes_manual": {
         # remove whitespace at start of line
-        ' 2.G.2 -SF6 and PFCs from Other Product Uses' : '2.G.2 - SF6 and PFCs from Other Product Uses',
-        ' 2.G.3 -N2O from Product Uses' : '2.G.3 - N2O from Product Uses',
-        ' 1.C.1 -Transport of CO2' : '1.C.1 - Transport of CO2',
+        " 2.G.2 -SF6 and PFCs from Other Product Uses": "2.G.2 - SF6 and PFCs from Other Product Uses",
+        " 2.G.3 -N2O from Product Uses": "2.G.3 - N2O from Product Uses",
+        " 1.C.1 -Transport of CO2": "1.C.1 - Transport of CO2",
         " 3.C.1 -Emissions from biomass burning ": "3.C.1",
         "Memo Items (5)": "MEMO",
         "International Bunkers": "M.BK",
@@ -115,11 +115,6 @@ inv_conf_per_year = {
             },
         },
         "rows_to_fix": {
-            # 3: [
-            #
-            #
-            #
-            # ],
             -2: [
                 "1.C.1 - Transport of CO2",
                 "2.G.2 - SF6 and PFCs from Other Product Uses",
@@ -143,137 +138,136 @@ inv_conf_per_year = {
 }
 
 inv_conf_per_entity = {
-    "CO" : {
-        'page_defs' : {
-            '39' : {
-                "area" : ['53,646,550,588'],
-                "cols" : ['279,328,364,400,440,478,520'],
+    "CO": {
+        "page_defs": {
+            "39": {
+                "area": ["53,646,550,588"],
+                "cols": ["279,328,364,400,440,478,520"],
             },
         },
-        "cat_codes_manual" : {"Total National Emissions" : "0"},
-        "category_column" : "Categories",
-        "columns_to_drop" : ["Categories"],
-        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
-        "unit" : "Gg",
+        "cat_codes_manual": {"Total National Emissions": "0"},
+        "category_column": "Categories",
+        "columns_to_drop": ["Categories"],
+        "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+        "unit": "Gg",
     },
-    "NOx" : {
-        'page_defs' : {
-            '38' : {
-                "area" : ['53,120,538,93'],
-                "cols" : ['281,329,365,405,441,477,513'],
+    "NOx": {
+        "page_defs": {
+            "38": {
+                "area": ["53,120,538,93"],
+                "cols": ["281,329,365,405,441,477,513"],
             },
-            '39' : {
-                "area" : ['51,772,539,740'],
-                "cols" : ['285,332,368,404,444,476,514'],
+            "39": {
+                "area": ["51,772,539,740"],
+                "cols": ["285,332,368,404,444,476,514"],
             },
-
         },
-        "cat_codes_manual" : {"Total National Emissions" : "0"},
-        "category_column" : "Categories",
-        "columns_to_drop" : ["Categories"],
-        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
-        "unit" : "Gg",
+        "cat_codes_manual": {"Total National Emissions": "0"},
+        "category_column": "Categories",
+        "columns_to_drop": ["Categories"],
+        "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+        "unit": "Gg",
     },
-    "HFCs" : {
-        'page_defs' : {
-            '38' : {
-                "area" : ['55,469,534,364'],
-                "cols" : ['251,302,367,427,486'],
+    "HFCs": {
+        "page_defs": {
+            "38": {
+                "area": ["55,469,534,364"],
+                "cols": ["251,302,367,427,486"],
             },
         },
-        "cat_codes_manual" : {"Total National Emissions (Gg CO2e)" : "0"},
-        "category_column" : "Categories",
-        "columns_to_drop" : ["Share, %", "Categories"],
-        "years" : ['2007', '2010', '2015', '2020'],
-        "unit" : "Gg CO2e",
+        "cat_codes_manual": {"Total National Emissions (Gg CO2e)": "0"},
+        "category_column": "Categories",
+        "columns_to_drop": ["Share, %", "Categories"],
+        "years": ["2007", "2010", "2015", "2020"],
+        "unit": "Gg CO2e",
     },
-    "N2O" : {
-        'page_defs' : {
-            '37' : {
-                "area" : ['55,106,556,79'],
-                "cols" : ['170,258,305,347,394,440,476,512'],
+    "N2O": {
+        "page_defs": {
+            "37": {
+                "area": ["55,106,556,79"],
+                "cols": ["170,258,305,347,394,440,476,512"],
             },
-            '38' : {
-                "area" : ['55,773,555,664'],
-                "cols" : ['215,264,306,353,395,439,476,513'],
+            "38": {
+                "area": ["55,773,555,664"],
+                "cols": ["215,264,306,353,395,439,476,513"],
             },
         },
-        "rows_to_fix" : {
-            3 : ["3 - Agriculture, Forestry, and Other",
-                 "3.C - Aggregate sources and non-",
-                 "4.D - Wastewater Treatment and",
-                 ]
+        "rows_to_fix": {
+            3: [
+                "3 - Agriculture, Forestry, and Other",
+                "3.C - Aggregate sources and non-",
+                "4.D - Wastewater Treatment and",
+            ]
         },
-        "cat_codes_manual" : {"Total National Emissions (Gg N2O)" : "0"},
-        "category_column" : "Categories",
-        "columns_to_drop" : ["Share, %", "Categories"],
-        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
-        "unit" : "Gg",
+        "cat_codes_manual": {"Total National Emissions (Gg N2O)": "0"},
+        "category_column": "Categories",
+        "columns_to_drop": ["Share, %", "Categories"],
+        "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+        "unit": "Gg",
     },
-    "CH4" : {
-        'page_defs' : {
-            '37' : {
-                "area" : ['55,423,552,216'],
-                "cols" : ['186,250,296,326,383,427,467,507'],
+    "CH4": {
+        "page_defs": {
+            "37": {
+                "area": ["55,423,552,216"],
+                "cols": ["186,250,296,326,383,427,467,507"],
             },
         },
-        "rows_to_fix" : {
-            3 : ["1.A - Fuel Combustion",
-                 "1.B - Fugitive emissions from",
-                 "3 - Agriculture, Forestry, and",
-                 "3.C - Aggregate sources and",
-                 "4.D - Wastewater Treatment",
-                 "Total National Emissions (Gg",
-                 ]
+        "rows_to_fix": {
+            3: [
+                "1.A - Fuel Combustion",
+                "1.B - Fugitive emissions from",
+                "3 - Agriculture, Forestry, and",
+                "3.C - Aggregate sources and",
+                "4.D - Wastewater Treatment",
+                "Total National Emissions (Gg",
+            ]
         },
-        "cat_codes_manual" : {"Total National Emissions (Gg CH4)" : "0"},
-        "category_column" : "Categories",
-        "columns_to_drop" : ["Share, %", "Categories"],
-        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
-        "unit" : "Gg",
+        "cat_codes_manual": {"Total National Emissions (Gg CH4)": "0"},
+        "category_column": "Categories",
+        "columns_to_drop": ["Share, %", "Categories"],
+        "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+        "unit": "Gg",
     },
-    "CO2" : {
-        'page_defs' : {
-            '36' : {
-                "area" : ['53,147,556,79'],
-                "cols" : ['150,204,254,306,352,406,459,513'],
+    "CO2": {
+        "page_defs": {
+            "36": {
+                "area": ["53,147,556,79"],
+                "cols": ["150,204,254,306,352,406,459,513"],
+            },
+            "37": {
+                "area": ["51,772,561,515"],
+                "cols": ["151,202,252,305,357,404,463,517"],
             },
-            '37' : {
-                "area" : ['51,772,561,515'],
-                "cols" : ['151,202,252,305,357,404,463,517'],
-            }
         },
-        "rows_to_fix" : {
-            2 : ["Categories",
-                 "Emissions and",
-                 ],
-            3 : ["1.A - Fuel",
-                 "1.B - Fugitive",
-                 "2 - Industrial Processes",
-                 "3 - Agriculture,",
-                 "Total National",
-                 "Total National",
-                 ],
-            5 : ["2.D - Non-Energy"],
-            -2 : [
+        "rows_to_fix": {
+            2: [
+                "Categories",
+                "Emissions and",
+            ],
+            3: [
+                "1.A - Fuel",
+                "1.B - Fugitive",
+                "2 - Industrial Processes",
+                "3 - Agriculture,",
+                "Total National",
+                "Total National",
+            ],
+            5: ["2.D - Non-Energy"],
+            -2: [
                 "Categories ",
                 "Emissions and Removals (Gg CO2)",
             ],
-
         },
-        "rows_to_drop" : [
+        "rows_to_drop": [
             "Total National Emissions (Gg CO2)",
-            "Total National Removals (Gg CO2)"
+            "Total National Removals (Gg CO2)",
         ],
-        "columns_to_drop" : ["Share, %", " Categories "],
-        "cat_codes_manual" : {"Total National Emissions and Removals (Gg CO2)" : "0"},
-        "category_column" : " Categories ",
-        "years" : ['1990', '1995', '2000', '2005', '2010', '2015', '2020'],
-        "unit" : "Gg",
+        "columns_to_drop": ["Share, %", " Categories "],
+        "cat_codes_manual": {"Total National Emissions and Removals (Gg CO2)": "0"},
+        "category_column": " Categories ",
+        "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
+        "unit": "Gg",
     },
-    "entity_row" : 0,
-    "unit_row" : 1,
-
 }
 
 # primap2 format conversion
@@ -283,13 +277,6 @@ coords_cols = {
     "unit": "unit",
 }
 
-# TODO: That's probably the same as above, test again.
-coords_cols_wide = {
-    "category": "category",
-    "entity": "entity",
-    "unit": "unit",
-}
-
 coords_defaults = {
     "source": "MNG-GHG-Inventory",
     "provenance": "measured",
@@ -303,7 +290,6 @@ coords_terminologies = {
     "scenario": "PRIMAP",
 }
 
-# TODO
 gwp_to_use = "SARGWP100"
 coords_value_mapping = {
     "unit": "PRIMAP1",
@@ -335,24 +321,27 @@ meta_data = {
 }
 
 country_processing_step1 = {
+    "tolerance": 0.01,
     "aggregate_cats": {
-        "M.3.C.AG": {
-            "sources": [
-                "3.C.1",
-                "3.C.2",
-                "3.C.3",
-                "3.C.4",
-                "3.C.5",
-                "3.C.6",
-                "3.C.7",
-                "3.C.8",
-            ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land "
-            "(Agriculture)",
-        },
+        # TODO: Remove "M.3.C.AG". Just here to see previous aggregation setup.
+        # "M.3.C.AG": {
+        #     "sources": [
+        #         "3.C.1",
+        #         "3.C.2",
+        #         "3.C.3",
+        #         "3.C.4",
+        #         "3.C.5",
+        #         "3.C.6",
+        #         "3.C.7",
+        #         "3.C.8",
+        #     ],
+        #     "name": "Aggregate sources and non-CO2 emissions sources on land "
+        #     "(Agriculture)",
+        # },
         "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
+        # TODO: In this case 3.C should be equivalent to M.3.C.AG, but I'm not sure.
         "M.AG.ELV": {
-            "sources": ["M.3.C.AG", "M.3.D.AG"],
+            "sources": ["3.C", "M.3.D.AG"],
             "name": "Agriculture excluding livestock",
         },
         "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
@@ -362,6 +351,9 @@ country_processing_step1 = {
             "sources": ["1", "2", "M.AG", "4", "5"],
             "name": "National total emissions excluding LULUCF",
         },
+        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},  # consistency check
+        "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},  # consistency check
+        "0": {"sources": ["1", "2", "3", "4"]},  # consistency check
     },
     "basket_copy": {
         "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],

+ 19 - 26
UNFCCC_GHG_data/UNFCCC_reader/Mongolia/read_MNG_BUR2_from_pdf.py

@@ -20,7 +20,6 @@ from config_MNG_BUR2 import (
     meta_data,
     country_processing_step1,
     gas_baskets,
-    coords_cols_wide
 )
 
 # ###
@@ -42,7 +41,7 @@ compression = dict(zlib=True, complevel=9)
 # 1. Read in main tables
 # ###
 
-df_all = None
+df_main = None
 for year in inv_conf_per_year.keys():
     print("-" * 60)
     print(f"Reading year {year}.")
@@ -119,13 +118,11 @@ for year in inv_conf_per_year.keys():
 
     # replace cat names by codes in col "category"
     # first the manual replacements
-    # TODO not sure this is needed
-    df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
 
     df_year_long["category"] = df_year_long["category"].replace(
         inv_conf["cat_codes_manual"]
     )
-    # TODO not sure this is needed
+
     df_year_long["category"] = df_year_long["category"].str.replace(".", "")
 
     # then the regex replacements
@@ -145,20 +142,19 @@ for year in inv_conf_per_year.keys():
 
     df_year_long = df_year_long.drop(columns=["orig_cat_name"])
 
-    if df_all is None:
-        df_all = df_year_long
+    if df_main is None:
+        df_main = df_year_long
     else:
-        df_all = pd.concat(
-            [df_all, df_year_long],
+        df_main = pd.concat(
+            [df_main, df_year_long],
             axis=0,
             join="outer",
         ).reset_index(drop=True)
 
-# TODO: choose different name for df here
 ### convert to interchange format ###
 print("Converting to interchange format.")
-df_all_IF = pm2.pm2io.convert_long_dataframe_if(
-    df_all,
+df_main_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_main,
     coords_cols=coords_cols,
     coords_defaults=coords_defaults,
     coords_terminologies=coords_terminologies,
@@ -171,14 +167,14 @@ df_all_IF = pm2.pm2io.convert_long_dataframe_if(
 
 ### convert to primap2 format ###
 print("Converting to primap2 format.")
-data_main_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
+data_main_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
 
 # ###
 # 2. Read in trend tables
 # ###
 
-df_all = None
-for entity in ["CO2", "CH4", "N2O", "HFCs", "NOx", "CO"]:
+df_trend = None
+for entity in inv_conf_per_entity.keys():
     print("-" * 60)
     print(f"Reading entity {entity}.")
 
@@ -252,28 +248,27 @@ for entity in ["CO2", "CH4", "N2O", "HFCs", "NOx", "CO"]:
     for year in inv_conf_per_entity[entity]["years"]:
         df_entity.loc[:, year] = df_entity[year].str.replace(",", "")
 
-    if df_all is None:
-        df_all = df_entity
+    if df_trend is None:
+        df_trend = df_entity
     else:
-        df_all = pd.concat(
-            [df_all, df_entity],
+        df_trend = pd.concat(
+            [df_trend, df_entity],
             axis=0,
             join="outer",
         ).reset_index(drop=True)
 
-
 ### convert to interchange format ###
 df_trend_IF = pm2.pm2io.convert_wide_dataframe_if(
-    data_wide=df_all,
-    coords_cols = coords_cols_wide,
+    data_wide=df_trend,
+    coords_cols=coords_cols,
     coords_defaults=coords_defaults,
     coords_terminologies=coords_terminologies,
     coords_value_mapping=coords_value_mapping,
-    #filter_remove=filter_remove,
+    # filter_remove=filter_remove,
     meta_data=meta_data,
     convert_str=True,
     time_format="%Y",
-    )
+)
 
 ### convert to primap2 format ###
 print("Converting to primap2 format.")
@@ -286,8 +281,6 @@ data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_IF)
 print("Merging main and trend table.")
 data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
 
-
-
 # ###
 # Save raw data to IF and native format.
 # ###