Bladeren bron

main table 2013 and 2014

Daniel Busch 7 maanden geleden
bovenliggende
commit
4d9405d6be

+ 188 - 5
src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py

@@ -1,12 +1,65 @@
 """
 Configuration file to read Bangladesh's BUR 1.
 """
+
 coords_terminologies = {
     "area": "ISO3",
     "category": "IPCC2006_PRIMAP",
     "scenario": "PRIMAP",
 }
 
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "BGD-GHG-Inventory",
+    "provenance": "measured",
+    "area": "BGD",
+    "scenario": "BUR2",
+}
+
+gwp_to_use = "AR4GWP100"
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+    # "entity": {
+    #     "HFCs": f"HFCS ({gwp_to_use})",
+    #     "PFCs": f"PFCS ({gwp_to_use})",
+    #     "SF6": f"SF6 ({gwp_to_use})",
+    #     "other halogenated gases": f"other halogenated gases ({gwp_to_use})",
+    #     "NMVOCs": "NMVOC",
+    # },
+}
+
+filter_remove = {
+    "f_memo": {"category": "MEMO"},
+    "f_info": {"category": "INFO"},
+    # "f2": {
+    #     "entity": ["Other halogenated gases without CO2 equivalent conversion factors"],
+    # },
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/634149",
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Bangladesh. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+inv_conf = {
+    "entity_row": 0,
+    "unit_row": 1,
+    "index_cols": "Greenhouse gas source and sink categories",
+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
+}
 inv_conf_per_year = {
     "2013": {
         "page_defs": {
@@ -20,10 +73,23 @@ inv_conf_per_year = {
                 "area": ["63,720,527,120"],
                 "cols": ["380,437,491"],
                 "skip_rows_start": 8,
-                "skip_rows_end": 6,
+                "skip_rows_end": 4,
             },
         },
         "rows_to_fix": {
+            3: [
+                "3 - GHG Emissions Agriculture, Livestock & Forest and Other Land -Use"
+            ],
+            2: [
+                # B-Methane emission from domestic waste water and
+                # c-nitrous oxide emission from domestic waste water are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # Total Manure ch4 emissions and
+                # Total Direct n2o emissions from manure system are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
             -2: [
                 "ch4 emission from rice field",
                 "indirect nitrous oxide (n2o) from n based fertilizer",
@@ -33,13 +99,130 @@ inv_conf_per_year = {
                 "Total Direct n2o emissions from manure system",
                 "Total indirect n2o emissions - Volatilization",
                 "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
             ],
-            3: [
-                "3 - GHG Emissions Agriculture, Livestock & Forest and Other Land -Use"
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 6,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "2 a. 1-cement Production": "2.A.1",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2014": {
+        "page_defs": {
+            "209": {
+                "area": ["74,715,542,78"],
+                "cols": ["380,441,498"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "210": {
+                "area": ["64,715,529,196"],
+                "cols": ["380,435,491"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
             ],
-            -3: [
-                "Greenhouse gas source and sink categories",
+            2: [
+                # B-Methane emission from domestic waste water and
+                # c-nitrous oxide emission from domestic waste water are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # Total Manure ch4 emissions and
+                # Total Direct n2o emissions from manure system are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
             ],
         },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-a - Fuel combustion activities_energy industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "2 a. 1-cement Production": "2.A.1",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
     },
 }

+ 123 - 68
src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py

@@ -3,8 +3,19 @@ Read Bangladesh's BUR1 from pdf
 """
 
 import camelot
+import numpy as np
 import pandas as pd
-from config_bgd_bur1 import coords_terminologies, inv_conf_per_year
+import primap2 as pm2
+from config_bgd_bur1 import (
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    filter_remove,
+    inv_conf,
+    inv_conf_per_year,
+    meta_data,
+)
 
 from unfccc_ghg_data.helper import (
     downloaded_data_path,
@@ -28,12 +39,15 @@ if __name__ == "__main__":
     category_column = f"category ({coords_terminologies['category']})"
     compression = dict(zlib=True, complevel=9)
 
+    def repl(m):  # noqa: D103
+        return m.group("code")
+
     # ###
     # 1. Read in main tables from the Annex
     # ###
-
+    df_main = None
     df_year = None
-    for year in inv_conf_per_year.keys():
+    for year in reversed(list(inv_conf_per_year.keys())):
         print("-" * 60)
         print(f"Reading year {year}.")
         print("-" * 60)
@@ -89,68 +103,109 @@ if __name__ == "__main__":
                     col_to_use=0,
                     n_rows=n_rows,
                 )
-        pass
-        # df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
-        #
-        # skip_rows = 11
-        # df_year = pd.concat(
-        #     [df_header, df_year[skip_rows:]], axis=0, join="outer"
-        # ).reset_index(drop=True)
-        #
-        # df_year = pm2.pm2io.nir_add_unit_information(
-        #     df_year,
-        #     unit_row=inv_conf["unit_row"],
-        #     entity_row=inv_conf["entity_row"],
-        #     regexp_entity=".*",
-        #     regexp_unit=".*",
-        #     default_unit="Gg",
-        # )
-        #
-        # print("Added unit information.")
-        #
-        # # set index
-        # df_year = df_year.set_index(inv_conf["index_cols"])
-        #
-        # # convert to long format
-        # df_year_long = pm2.pm2io.nir_convert_df_to_long(
-        #     df_year, year, inv_conf["header_long"]
-        # )
-        #
-        # # extract from tuple
-        # df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
-        #
-        # # prep for conversion to PM2 IF and native format
-        # # make a copy of the categories row
-        # df_year_long["category"] = df_year_long["orig_cat_name"]
-        #
-        # # replace cat names by codes in col "category"
-        # # first the manual replacements
-        #
-        # df_year_long["category"] = df_year_long["category"].replace(
-        #     inv_conf["cat_codes_manual"]
-        # )
-        #
-        # df_year_long["category"] = df_year_long["category"].str.replace(".", "")
-        #
-        # # then the regex replacements
-        # df_year_long["category"] = df_year_long["category"].str.replace(
-        #     inv_conf["cat_code_regexp"], repl, regex=True
-        # )
-        #
-        # df_year_long = df_year_long.reset_index(drop=True)
-        #
-        # df_year_long["data"] = df_year_long["data"].str.replace(",", "")
-        #
-        # # make sure all col headers are str
-        # df_year_long.columns = df_year_long.columns.map(str)
-        #
-        # df_year_long = df_year_long.drop(columns=["orig_cat_name"])
-        #
-        # if df_main is None:
-        #     df_main = df_year_long
-        # else:
-        #     df_main = pd.concat(
-        #         [df_main, df_year_long],
-        #         axis=0,
-        #         join="outer",
-        #     ).reset_index(drop=True)
+
+        df_header = pd.DataFrame(
+            [inv_conf_per_year[year]["header"], inv_conf_per_year[year]["unit"]]
+        )
+        skip_rows = inv_conf_per_year[year]["skip_rows"]
+
+        df_year = pd.concat(
+            [df_header, df_year[skip_rows:]], axis=0, join="outer"
+        ).reset_index(drop=True)
+
+        df_year = pm2.pm2io.nir_add_unit_information(
+            df_year,
+            unit_row=1,
+            entity_row=0,
+            regexp_entity=".*",
+            regexp_unit=".*",
+            default_unit="Gg",
+        )
+
+        print("Added unit information.")
+
+        # set index
+        df_year = df_year.set_index(inv_conf["index_cols"])
+
+        # convert to long format
+        df_year_long = pm2.pm2io.nir_convert_df_to_long(
+            df_year, year, inv_conf["header_long"]
+        )
+
+        # extract from tuple
+        df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
+
+        # prep for conversion to PM2 IF and native format
+        # make a copy of the categories row
+        df_year_long["category"] = df_year_long["orig_cat_name"]
+
+        # first the manual replacements
+        df_year_long["category"] = df_year_long["category"].replace(
+            inv_conf_per_year[year]["cat_codes_manual"]
+        )
+
+        # Remove dots between letters in category codes
+        df_year_long["category"] = df_year_long["category"].str.replace(".", "")
+        # Some categories have a dash between the letters
+        df_year_long["category"] = df_year_long["category"].str.replace("-", " ")
+
+        # then the regex replacements
+        df_year_long["category"] = df_year_long["category"].str.replace(
+            inv_conf["cat_code_regexp"], repl, regex=True
+        )
+
+        df_year_long = df_year_long.reset_index(drop=True)
+
+        # make sure all col headers are str
+        df_year_long.columns = df_year_long.columns.map(str)
+
+        df_year_long = df_year_long.drop(columns=["orig_cat_name"])
+
+        # TODO Is there a better way to do this?
+        # merge duplicate categories and sum their values
+        if "merge_cats" in inv_conf_per_year[year]:
+            cat = inv_conf_per_year[year]["merge_cats"]
+            # filter by category to be merged
+            df_temp = df_year_long.loc[df_year_long["category"] == cat]
+            df_temp = df_temp.replace("", np.nan)
+            df_temp["data"] = df_temp["data"].apply(float)
+            # sum values for duplicate entries
+            df_temp = df_temp.groupby(
+                ["entity", "unit", "time", "category"], as_index=False
+            )["data"].sum()
+            # change back to empty strings
+            df_temp = df_temp.replace(0, "")
+            # drop category from df
+            df_year_long = df_year_long.drop(
+                df_year_long[df_year_long["category"] == cat].index
+            )
+            # append the summed up sub-set
+            df_year_long = pd.concat(
+                [df_temp, df_year_long],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+        if df_main is None:
+            df_main = df_year_long
+        else:
+            df_main = pd.concat(
+                [df_main, df_year_long],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    ### convert to interchange format ###
+    print("Converting to interchange format.")
+    df_main_IF = pm2.pm2io.convert_long_dataframe_if(
+        df_main,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        convert_str=True,
+        time_format="%Y",
+    )
+    pass