瀏覽代碼

gas basket works

Daniel Busch 9 月之前
父節點
當前提交
fae3e70d8f

+ 101 - 84
src/unfccc_ghg_data/unfccc_reader/Mongolia/config_mng_bur2.py

@@ -16,10 +16,10 @@ inv_conf = {
     "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
     "cat_codes_manual": {
         # remove whitespace at start of line
-        " 2.G.2 -SF6 and PFCs from Other Product Uses": "2.G.2 - SF6 and PFCs from Other Product Uses",
-        " 2.G.3 -N2O from Product Uses": "2.G.3 - N2O from Product Uses",
-        " 1.C.1 -Transport of CO2": "1.C.1 - Transport of CO2",
-        " 3.C.1 -Emissions from biomass burning ": "3.C.1",
+        "2.G.2 -SF6 and PFCs from Other Product Uses": "2.G.2 - SF6 and PFCs from Other Product Uses",
+        "2.G.3 -N2O from Product Uses": "2.G.3 - N2O from Product Uses",
+        "1.C.1 -Transport of CO2": "1.C.1 - Transport of CO2",
+        "3.C.1 -Emissions from biomass burning ": "3.C.1",
         "Memo Items (5)": "MEMO",
         "International Bunkers": "M.BK",
         "1.A.3.a.i - International Aviation (International Bunkers) (1)": "M.BK.A",
@@ -78,7 +78,7 @@ inv_conf_per_year = {
                 "1.A.3.a.i - International Aviation (International",
             ],
             -2: ["3.C.1 - Emissions from biomass burning"],
-            2: [" 3.C.1 -Emissions from biomass burning"],
+            2: ["3.C.1 -Emissions from biomass burning"],
         },
         "page_defs": {
             "176": {
@@ -181,8 +181,9 @@ inv_conf_per_entity = {
         },
         "cat_codes_manual": {"Total National Emissions (Gg CO2e)": "0"},
         "category_column": "Categories",
-        "columns_to_drop": ["Share, %", "Categories"],
-        "years": ["2007", "2010", "2015", "2020"],
+        # 2007 will break gas basket consistency check
+        "columns_to_drop": ["Share, %", "Categories", "2007"],
+        "years": ["2010", "2015", "2020"],
         "unit": "Gg CO2e",
     },
     "N2O": {
@@ -208,6 +209,7 @@ inv_conf_per_entity = {
         "columns_to_drop": ["Share, %", "Categories"],
         "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
         "unit": "Gg",
+        "del_value": [("1995", "4"), ("2005", "4")],
     },
     "CH4": {
         "page_defs": {
@@ -231,6 +233,7 @@ inv_conf_per_entity = {
         "columns_to_drop": ["Share, %", "Categories"],
         "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
         "unit": "Gg",
+        "del_value": [("1995", "4"), ("2005", "4")],
     },
     "CO2": {
         "page_defs": {
@@ -258,7 +261,7 @@ inv_conf_per_entity = {
             ],
             5: ["2.D - Non-Energy"],
             -2: [
-                "Categories ",
+                "Categories",
                 "Emissions and Removals (Gg CO2)",
             ],
         },
@@ -266,9 +269,9 @@ inv_conf_per_entity = {
             "Total National Emissions (Gg CO2)",
             "Total National Removals (Gg CO2)",
         ],
-        "columns_to_drop": ["Share, %", " Categories "],
+        "columns_to_drop": ["Share, %", "Categories"],
         "cat_codes_manual": {"Total National Emissions and Removals (Gg CO2)": "0"},
-        "category_column": " Categories ",
+        "category_column": "Categories",
         "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
         "unit": "Gg",
     },
@@ -336,13 +339,13 @@ inv_conf_per_sector = {
                 "Year",
             ],
         },
-        "year_column": " Year ",
+        "year_column": "Year",
         "cat_codes_manual": {
-            " Energy ": "1",
-            " IPPU ": "2",
-            " Agriculture ": "M.AG",
-            " Waste ": "4",
-            " LULUCF ": "M.LULUCF",
+            "Energy": "1",
+            "IPPU": "2",
+            "Agriculture": "M.AG",
+            "Waste": "4",
+            "LULUCF": "M.LULUCF",
             "Total (excl. LULUCF)": "M.0.EL",
             "Total (incl. LULUCF)": "0",
         },
@@ -367,16 +370,16 @@ inv_conf_per_sector = {
             ],
         },
         "rows_to_drop": [0, 2],
-        "year_column": "Years     ",
+        "year_column": "Years",
         "cat_codes_manual": {
-            r" 1.A.1.a.i Electricity  generation  ": "1.A.1.a.i",
-            r" 1.A.1.a.ii  Combined  heat and ipower peneration (CHP)": "1.A.1.a.ii",
-            r" 1.A.1.c.ii  Other  energy ndustries ": "1.A.1.c.ii",
-            r"Manufacturing industries and  construction   ": "1.A.2",
-            r" 1.A.3.a 1 Civil  aviation t  ": "1.A.3.a",
-            r" .A.3.b Road  ransportation  ": "1.A.3.b",
-            r" 1.A.3.c Railways    ": "1.A.3.c",
-            r" 1.A.3.e.ii  Off-road   ": "1.A.3.e.ii",
+            "1.A.1.a.i Electricity  generation": "1.A.1.a.i",
+            "1.A.1.a.ii  Combined  heat and ipower peneration (CHP)": "1.A.1.a.ii",
+            "1.A.1.c.ii  Other  energy ndustries": "1.A.1.c.ii",
+            "Manufacturing industries and  construction": "1.A.2",
+            "1.A.3.a 1 Civil  aviation t": "1.A.3.a",
+            ".A.3.b Road  ransportation": "1.A.3.b",
+            "1.A.3.c Railways": "1.A.3.c",
+            "1.A.3.e.ii  Off-road": "1.A.3.e.ii",
         },
     },
     "energy cont": {
@@ -395,16 +398,16 @@ inv_conf_per_sector = {
             ],
         },
         "rows_to_drop": [0, 2],
-        "year_column": "Years    ",
+        "year_column": "Years",
         "cat_codes_manual": {
-            "Other sectors 1.A.4.a Commercial/ Institutional  ": "1.A.4.a",
-            " 1.A.4.b Residen-tial  ": "1.A.4.b",
-            " 1.A.4.c.i Agriculture -Stationary  ": "1.A.4.c.i",
-            " 1.A.4.c.ii Agriculture -Off-road vehicles and other machinery": "1.A.4.c.ii",
-            "Non-specified 1.A.5.a Stationary  ": "1.A.5.a",
-            "Fugitive emis 1.B.1.a Coal mining & handling (surface mining) ": "1.B.1.a",
-            "sions from fu 1.B.2.a.ii Oil -Flaring  ": "1.B.2.a.ii",
-            "els 1.B.2.a.iii.2 Oil production and upgrading ": "1.B.2.a.iii",
+            "Other sectors 1.A.4.a Commercial/ Institutional": "1.A.4.a",
+            "1.A.4.b Residen-tial": "1.A.4.b",
+            "1.A.4.c.i Agriculture -Stationary": "1.A.4.c.i",
+            "1.A.4.c.ii Agriculture -Off-road vehicles and other machinery": "1.A.4.c.ii",
+            "Non-specified 1.A.5.a Stationary": "1.A.5.a",
+            "Fugitive emis 1.B.1.a Coal mining & handling (surface mining)": "1.B.1.a",
+            "sions from fu 1.B.2.a.ii Oil -Flaring": "1.B.2.a.ii",
+            "els 1.B.2.a.iii.2 Oil production and upgrading": "1.B.2.a.iii",
         },
     },
     "ippu": {
@@ -422,13 +425,13 @@ inv_conf_per_sector = {
                 "Year",
             ],
         },
-        "year_column": "Year ",
+        "year_column": "Year",
         "cat_codes_manual": {
-            "2.A-Mineral industry ": "2.A",
-            "2.C-Metal industry ": "2.C",
+            "2.A-Mineral industry": "2.A",
+            "2.C-Metal industry": "2.C",
             "2.D-Non-energy products from fuels and solvent use": "2.D",
             "2.F-Product uses as substitutes for ozone depleting substances": "2.F",
-            "2. IPPU Total ": "2",
+            "2. IPPU Total": "2",
         },
         "remove_duplicates": ["2"],
     },
@@ -447,14 +450,14 @@ inv_conf_per_sector = {
             ],
         },
         "rows_to_drop": [0, 1],
-        "year_column": "Year ",
+        "year_column": "Year",
         "cat_codes_manual": {
             "Fermentation Gg": "3.A.1",
             "Management CH4": "3.A.2",
-            " (Total CH4) ": "3.A",
+            "(Total CH4)": "3.A",
             "Fermentation Gg C": "3.A.1",
             "Management O2e": "3.A.2",
-            " (Gg CO2e) ": "3.A",
+            "(Gg CO2e)": "3.A",
         },
         "multi_entity": {
             "unit": ["Gg", "Gg", "Gg", "Gg CO2e", "Gg CO2e", "Gg CO2e"],
@@ -486,7 +489,7 @@ inv_conf_per_sector = {
                 "3.C.1 - Emiss",
             ],
         },
-        "year_column": "  Year  ",
+        "year_column": "Year",
         # TODO: These categories are technically duplicate, just with a different unit
         "categories_to_drop": [
             "3.C.1 -Emiss  CH4 (Gg CO2e)",
@@ -494,10 +497,10 @@ inv_conf_per_sector = {
             "ss burning  Total (Gg CO2e)",
         ],
         "cat_codes_manual": {
-            " 3.C.1  CH4 (Gg) ": "3.C.1",
-            " -Emissions fr  N2O (Gg) ": "3.C.1",
-            " om biomass bur  NOx (Gg) ": "3.C.1",
-            " ning  CO(Gg) ": "3.C.1",
+            "3.C.1  CH4 (Gg)": "3.C.1",
+            "-Emissions fr  N2O (Gg)": "3.C.1",
+            "om biomass bur  NOx (Gg)": "3.C.1",
+            "ning  CO(Gg)": "3.C.1",
         },
         "multi_entity": {
             "unit": ["Gg", "Gg", "Gg", "Gg"],
@@ -523,29 +526,35 @@ inv_conf_per_sector = {
                 "Urine and dung",
             ],
         },
-        "year_column": "  Year   ",
+        "year_column": "Year",
         # # TODO: technically duplicate, just with a different unit
         "categories_to_drop": [
-            " 3.C.4 -Direct N2O Emissions from managed soils (CO2e) Gg CO2e",
+            "3.C.4 -Direct N2O Emissions from managed soils (CO2e) Gg CO2e",
+            "Inorganic N fertilizer application  N2O (Gg)",
+            "Organic N applied as fertilizer (manure) N2O (Gg)",
+            "Urine and dung N deposited on pasture, range and paddock by grazing animals N2O (Gg)",
+            "N in crop residues  N2O (Gg)",
         ],
         "cat_codes_manual": {
             # TODO the next 4 categories are made up placeholders
-            " Inorganic N fertilizer application  N2O (Gg)": "3.C.4.i",
-            " Organic N applied as fertilizer (manure) N2O (Gg)": "3.C.4.ii",
-            "Urine and dung N deposited on pasture, range and paddock by grazing animals N2O (Gg)": "3.C.4.iii",
-            "  N in crop residues  N2O (Gg)": "3.C.4.iiii",
-            " 3.C.4 -Direct N2O Emissions from managed soils N2O (Gg)": "3.C.4",
-        },
-        "multi_entity": {
-            "unit": ["Gg", "Gg", "Gg", "Gg", "Gg"],
-            "entity": [
-                "N2O",
-                "N2O",
-                "N2O",
-                "N2O",
-                "N2O",
-            ],
+            # "Inorganic N fertilizer application  N2O (Gg)": "3.C.4.i",
+            # "Organic N applied as fertilizer (manure) N2O (Gg)": "3.C.4.ii",
+            # "Urine and dung N deposited on pasture, range and paddock by grazing animals N2O (Gg)": "3.C.4.iii",
+            # "N in crop residues  N2O (Gg)": "3.C.4.iiii",
+            "3.C.4 -Direct N2O Emissions from managed soils N2O (Gg)": "3.C.4",
         },
+        "entity": "N2O",
+        "unit": "Gg",
+        # "multi_entity": {
+        #     "unit": ["Gg", "Gg", "Gg", "Gg", "Gg"],
+        #     "entity": [
+        #         "N2O",
+        #         "N2O",
+        #         "N2O",
+        #         "N2O",
+        #         "N2O",
+        #     ],
+        # },
     },
     "managed_soils_indirect": {
         "page_defs": {
@@ -565,15 +574,17 @@ inv_conf_per_sector = {
                 "3.C.5 - Indirect N2O",
             ],
         },
-        "year_column": "  Year  ",
+        "year_column": "Year",
         # # TODO: technically duplicate, just with a different unit
         "categories_to_drop": [
-            "3.C.5 -Indirect N2O emissions from managed  soils Gg CO2e"
+            "3.C.5 -Indirect N2O emissions from managed  soils Gg CO2e",
+            "Volatilization  pathway Gg N2O",
+            "Leaching/runoff  pathway Gg N2O",
         ],
         "cat_codes_manual": {
             # TODO the next 2 categories are made up placeholders
-            " Volatilization  pathway Gg N2O": "3.C.5.i",
-            " Leaching/runoff  pathway Gg N2O": "3.C.5.ii",
+            # "Volatilization  pathway Gg N2O": "3.C.5.i",
+            # "Leaching/runoff  pathway Gg N2O": "3.C.5.ii",
             "3.C.5 -Indirect N2O emissions from managed  soils Gg N2O": "3.C.5",
         },
         "entity": "N2O",
@@ -592,17 +603,24 @@ inv_conf_per_sector = {
                 "Year",
             ],
         },
-        "year_column": "Year ",
+        "year_column": "Year",
         # # TODO: technically duplicate, just with a different unit
-        "categories_to_drop": ["Total emissions from SWDS Gg CO2e"],
+        "categories_to_drop": [
+            "Total emissions from SWDS Gg CO2e",
+            "Food",
+            "Garden",
+            "Paper Gg CH4",
+            "Wood",
+            "Textile",
+        ],
         "cat_codes_manual": {
             # TODO the categories are made up placeholders
-            "Food ": "4.A.1.food",
-            "Garden ": "4.A.1.garden",
-            "Paper Gg CH4": "4.A.1.paper",
-            "Wood ": "4.A.1.wood",
-            "Textile ": "4.A.1.textile",
-            "Total ": "4.A.1.",
+            # "Food": "4.A.1.food",
+            # "Garden": "4.A.1.garden",
+            # "Paper Gg CH4": "4.A.1.paper",
+            # "Wood": "4.A.1.wood",
+            # "Textile": "4.A.1.textile",
+            "Total": "4.A.1.",
         },
         "entity": "CH4 ",
         "unit": "Gg",
@@ -625,19 +643,18 @@ inv_conf_per_sector = {
                 "Wastewater",
             ],
         },
-        "year_column": "   Year  ",
+        "year_column": "Year",
         # # TODO: technically duplicate, just with a different unit
         "categories_to_drop": [
-            " Domestic wastewater  CH4 emissions ",
-            " Domestic wastewater  N2O emissions (Gg C",
-            " Industrial wastewater  CH4 emissions O2 e)",
-            "Wastewater treatment and discharge  Total emissions ",
+            "Domestic wastewater  CH4 emissions",
+            "Domestic wastewater  N2O emissions (Gg C",
+            "Industrial wastewater  CH4 emissions O2 e)",
+            "Wastewater treatment and discharge  Total emissions",
         ],
         "cat_codes_manual": {
-            # TODO the categories are made up placeholders
-            " Domestic wastewater  CH4 emissions (Gg CH4)": "4.D.1",
-            " Domestic wastewater  N2O emissions (Gg N2O)": "4.D.1",
-            " Industrial wastewater  CH4 emissions (Gg CH4)": "4.D.2",
+            "Domestic wastewater  CH4 emissions (Gg CH4)": "4.D.1",
+            "Domestic wastewater  N2O emissions (Gg N2O)": "4.D.1",
+            "Industrial wastewater  CH4 emissions (Gg CH4)": "4.D.2",
         },
         "multi_entity": {
             "unit": ["Gg", "Gg", "Gg"],

+ 8 - 0
src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py

@@ -257,6 +257,10 @@ if __name__ == "__main__":
         for year in inv_conf_per_entity[entity]["years"]:
             df_entity.loc[:, year] = df_entity[year].str.replace(",", "")
 
+        if "del_value" in inv_conf_per_entity[entity]:
+            for year_del, category_del in inv_conf_per_entity[entity]["del_value"]:
+                df_entity.loc[df_entity["category"] == category_del, year_del] = ""
+
         if df_trend is None:
             df_trend = df_entity
         else:
@@ -422,10 +426,14 @@ if __name__ == "__main__":
         # transpose so categegories are in first columns
         df_sector = df_sector.T
 
+        # strip white spaces from column names
+        df_sector.columns = df_sector.columns.str.strip()
+
         df_sector = df_sector.rename(
             columns={inv_conf_per_sector[sector]["year_column"]: "category"}
         )
 
+        df_sector["category"] = df_sector["category"].str.strip()
         df_sector["category"] = df_sector["category"].str.replace("\n", "")
 
         # TODO This is the same functionality as remove_duplicates ?