浏览代码

fix typed table's units

Daniel Busch 6 月之前
父节点
当前提交
0a88a4605b

+ 30 - 19
src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py

@@ -699,10 +699,9 @@ wide_to_long_col_replace = {
 
 manually_typed = {
     "figure_16": {
-        # TODO Conflicting entities in figure: CO2e or CO2?
-        # It says CO2 and gG more often, so I'm going with CO2
-        "unit": "Gg",
-        "entity": "CO2",
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
         "data": {
             "category": [
                 "1.A.2.a",
@@ -827,10 +826,9 @@ manually_typed = {
         },
     },
     "figure_17": {
-        # TODO Conflicting entities in figure: CO2e or CO2?
-        # This category should mainly be combustion emissions -> CO2
-        "entity": "CO2",
-        "unit": "Gg",
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
         "data": {
             "category": [
                 "1.A.3.a.ii",
@@ -907,9 +905,9 @@ manually_typed = {
         },
     },
     "figure_18": {
-        # TODO Conflicting entities in figure: CO2e or CO2?
-        "entity": "CO2",
-        "unit": "Gg",
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
         "data": {
             "category": ["1.A.4.a", "1.A.4.b"],
             "2013": [1871, 6703],
@@ -922,9 +920,9 @@ manually_typed = {
         },
     },
     "figure_19": {
-        # TODO Conflicting entities in figure: CO2e or CO2?
-        "unit": "Gg",
-        "entity": "CO2",
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
         "data": {
             "category": ["1.A.4.c.i", "1.A.4.c.iii", "1.A.4.c"],
             "2013": [2692, 5, 2697],
@@ -937,8 +935,8 @@ manually_typed = {
         },
     },
     "figure_20": {
-        "unit": "Gg",
-        "entity": "CO2",
+        "unit": "GgCO2eq",
+        "entity": "CH4",
         "data": {
             "category": ["1.B.2.b.iii.4", "1.B.2.b.iii.5", "1.B.2.b.iii"],
             "2013": [896, 8440, 9336],
@@ -949,6 +947,7 @@ manually_typed = {
             "2018": [896, 6429, 7325],
             "2019": [896, 4289, 5185],
         },
+        "unit_conversion": {"new_unit": "Gg", "conversion_factor": 25},
     },
 }
 
@@ -980,9 +979,7 @@ country_processing_step1 = {
                 "3.C.7",
             ]
         },
-        # There is no data for 3.D
-        # "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
-        # "M.3.D.AG" is empty, so I'm not sure we need it
+        "3.C": {"sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.7"]},
         "M.AG.ELV": {
             "sources": ["M.3.C.AG", "M.3.D.AG"],
         },
@@ -1015,12 +1012,26 @@ country_processing_step1 = {
                 "1.A.2.m",
             ]
         },
+        # check if typed numbers add up to the total of 1.A.3 from the main table
+        "1.A.3": {
+            "sources": [
+                "1.A.3.a.ii",
+                "1.A.3.b.i.2",
+                "1.A.3.b.ii.2",
+                "1.A.3.b.iii",
+                "1.A.3.b.iv",
+                "1.A.3.c",
+                "1.A.3.d.ii",
+            ]
+        },
         # check if the typed numbers add up to the total of 1.A.4.c in the same table
         "1.A.4.c": {"sources": ["1.A.4.c.i", "1.A.4.c.iii"]},
         # check if typed numbers add up to the total of 1.A.4 from the main table
         "1.A.4": {"sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c"]},
         # check if the typed numbers add up to the total of 1.A.4.c in the same table
         "1.B.2.b.iii": {"sources": ["1.B.2.b.iii.4", "1.B.2.b.iii.5"]},
+        # consistency check for 1.B.2
+        "1.B.2": {"sources": ["1.B.2.b.iii"]},
     },
     # We don't have HFCs and PFCs in the report, hence basket_copy is not relevant
     # "basket_copy": {

+ 23 - 8
src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py

@@ -216,24 +216,39 @@ if __name__ == "__main__":
         df_typed_figure["entity"] = manually_typed[figure]["entity"]
         df_typed_figure["unit"] = manually_typed[figure]["unit"]
 
+        # adjust column names for wide to long function
+        df_typed_figure = df_typed_figure.rename(columns=wide_to_long_col_replace)
+        df_typed_figure_long = pd.wide_to_long(
+            df_typed_figure, stubnames="data", i="category", j="time"
+        ).reset_index()
+
+        if "unit_conversion" in manually_typed[figure].keys():
+            df_typed_figure_long["unit"] = manually_typed[figure]["unit_conversion"][
+                "new_unit"
+            ]
+            conv_factor = manually_typed[figure]["unit_conversion"]["conversion_factor"]
+            df_typed_figure_long["data"] = df_typed_figure_long["data"].map(
+                lambda a: a / conv_factor
+            )
+
         if df_typed is None:
-            df_typed = df_typed_figure
+            df_typed = df_typed_figure_long
         else:
             df_typed = pd.concat(
-                [df_typed, df_typed_figure],
+                [df_typed, df_typed_figure_long],
                 axis=0,
                 join="outer",
             ).reset_index(drop=True)
 
-    # adjust column names for wide to long function
-    df_typed = df_typed.rename(columns=wide_to_long_col_replace)
-    df_typed_long = pd.wide_to_long(
-        df_typed, stubnames="data", i="category", j="time"
-    ).reset_index()
+    # # adjust column names for wide to long function
+    # df_typed = df_typed.rename(columns=wide_to_long_col_replace)
+    # df_typed_long = pd.wide_to_long(
+    #     df_typed, stubnames="data", i="category", j="time"
+    # ).reset_index()
 
     # merge manually typed and main tables from Annex
     df_main = pd.concat(
-        [df_main, df_typed_long],
+        [df_main, df_typed],
         axis=0,
         join="outer",
     ).reset_index(drop=True)