浏览代码

all tables, saved in raw format

Daniel Busch 6 月之前
父节点
当前提交
b0767d4bef

+ 144 - 4
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py

@@ -5,10 +5,10 @@ Tables to read:
 - The sector tables in the Annex from page 149 - done
 - trend tables page 111-113 - done
 - page 116 - done
-- page 118- work in progress
-- page 119
-- page 121
-- page 124
+- page 118 - done
+- page 119 - done
+- page 121 - done
+- page 123 - wip
 
 Not reading:
 - page 97 - trend table with data for 2008, because it's in the trend tables from page 111
@@ -70,6 +70,144 @@ conf_general = {
 }
 
 conf_trend = {
+    "fugitive": {
+        "rows_to_fix": {2: ["1.B.3 - Other emissions from"]},
+        "page_defs": {
+            "125": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            },
+            "126": {
+                "read_params": dict(
+                    flavor="stream",
+                    table_areas=["72,681,564,638"],
+                    columns=["203,238,272,305,340,370,402,439,469,504,536"],
+                ),
+                "skip_rows_start": 1,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "other_sectors": {
+        "page_defs": {
+            "123": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "transport_sub": {
+        "page_defs": {
+            "121": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            },
+            "122": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 0,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "transport": {
+        "page_defs": {
+            "119": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            }
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "manufacturing_and_construction": {
+        "page_defs": {
+            "118": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            }
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
     "energy_industries": {
         "entity": f"KYOTOGHG ({gwp_to_use})",
         "unit": "GgCO2eq",
@@ -107,6 +245,8 @@ conf_trend = {
         },
     },
     "overview": {
+        # Inconsistencies for table page 11 and page 125 for categories 1.B, 1.B.1 and 1.B.2
+        "rows_to_drop": ["1B", "1B1", "1B2"],
         "fix_single_value": {
             "cat": "MBIO",
             "year": "2018",

+ 40 - 7
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py

@@ -99,9 +99,10 @@ if __name__ == "__main__":
         # Remove line break characters
         df_table["category"] = df_table["category"].str.replace("\n", " ")
         # first the manual replacements
-        df_table["category"] = df_table["category"].replace(
-            conf_trend[table]["cat_codes_manual"]
-        )
+        if "cat_codes_manual" in conf_trend[table].keys():
+            df_table["category"] = df_table["category"].replace(
+                conf_trend[table]["cat_codes_manual"]
+            )
         # remove dots from category codes
         df_table["category"] = df_table["category"].str.replace(".", "")
         # then the regex replacements
@@ -111,11 +112,18 @@ if __name__ == "__main__":
 
         df_table = df_table.drop(columns="orig_category")
 
+        # drop rows if needed
+        if "rows_to_drop" in conf_trend[table].keys():
+            for row in conf_trend[table]["rows_to_drop"]:
+                row_to_delete = df_table.index[df_table["category"] == row][0]
+                df_table = df_table.drop(index=row_to_delete)
+
         # clean values
         for year in conf_trend[table]["years"]:
-            df_table[year] = df_table[year].replace(
-                conf_trend[table]["replace_data_entries"]
-            )
+            if "replace_data_entries" in conf_trend[table].keys():
+                df_table[year] = df_table[year].replace(
+                    conf_trend[table]["replace_data_entries"]
+                )
             df_table[year] = df_table[year].str.replace("\n", "")
             df_table[year] = df_table[year].str.replace(",", ".")
             # invisible numbers in trend table on page 112
@@ -152,6 +160,8 @@ if __name__ == "__main__":
             ).reset_index(drop=True)
 
         # break
+    # some categories present in main and detailled tables
+    df_trend = df_trend.drop_duplicates()
 
     df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
         df_trend,
@@ -284,4 +294,27 @@ if __name__ == "__main__":
     print("Converting to primap2 format.")
     data_main_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
 
-    pass
+    # # ###
+    # # Merge tables.
+    # # ###
+
+    print("Merging main and trend table.")
+    data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
+
+    # # ###
+    # # Save raw data to IF and native format.
+    # # ###
+
+    data_if = data_pm2.pr.to_interchange_format()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder
+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+        encoding=encoding,
+    )