소스 검색

cat aggregation

Daniel Busch 6 달 전
부모
커밋
ea6198cf20

+ 88 - 0
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py

@@ -483,3 +483,91 @@ conf = {
         },
     },
 }
+
+fix_values_main = [("3A", "CH4", "0.203")]
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}
+
+country_processing_step1 = {
+    "tolerance": 0.01,
+    "aggregate_cats": {
+        "M.3.D.AG": {"sources": ["3.D.2"]},
+        "M.3.C.AG": {
+            "sources": [
+                "3.C.1",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ],
+        },
+        "M.AG.ELV": {
+            "sources": ["M.3.C.AG", "M.3.D.AG"],
+        },
+        "3.A": {"sources": ["3.A.1", "3.A.2"]},
+        "3.C": {
+            "sources": [
+                "3.C.1",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ]
+        },
+        "3.D": {"sources": ["3.D.1", "3.D.2"]},
+        "M.AG": {"sources": ["3.A", "M.AG.ELV"]},
+        "M.3.D.LU": {"sources": ["3.D.1"]},
+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"]},
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+        },
+        "3": {"sources": ["M.AG", "M.LULUCF"]},  # consistency check
+        "0": {"sources": ["1", "2", "3", "4"]},  # consistency check
+    },
+    "basket_copy": {
+        "GWPs_to_add": ["AR4GWP100", "SARGWP100", "AR6GWP100"],
+        "entities": ["HFCS", "PFCS", "KYOTOGHG", "UnspMixOfHFCs"],
+        "source_GWP": gwp_to_use,
+    },
+    # "downscale": {
+    #     "sectors": {
+    #         "1.B_CH4": {
+    #             "basket": "1.B",
+    #             "basket_contents": ["1.B.1", "1.B.2"],
+    #             "entities": ["CH4"],
+    #             "dim": f"category ({coords_terminologies['category']})",
+    #             # "tolerance": 0.05,  # some inconsistencies (rounding?)
+    #         },
+    #         "1.B_CO2": {
+    #             "basket": "1.B",
+    #             "basket_contents": ["1.B.1", "1.B.2"],
+    #             "entities": ["CO2"],
+    #             "dim": f"category ({coords_terminologies['category']})",
+    #             "sel": {
+    #                 "time": [
+    #                     "2000",
+    #                     "2005",
+    #                     "2010",
+    #                     "2015",
+    #                     "2020",
+    #                 ]
+    #             },
+    #         },
+    #     }
+    # },
+}

+ 52 - 2
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py

@@ -5,7 +5,12 @@ import camelot
 import pandas as pd
 import primap2 as pm2
 
-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path, fix_rows
+from unfccc_ghg_data.helper import (
+    downloaded_data_path,
+    extracted_data_path,
+    fix_rows,
+    process_data_for_country,
+)
 from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import (
     conf,
     conf_general,
@@ -14,7 +19,10 @@ from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import
     coords_defaults,
     coords_terminologies,
     coords_value_mapping,
+    country_processing_step1,
     filter_remove,
+    fix_values_main,
+    gas_baskets,
     meta_data,
 )
 
@@ -159,7 +167,6 @@ if __name__ == "__main__":
                 join="outer",
             ).reset_index(drop=True)
 
-        # break
     # some categories present in main and detailled tables
     df_trend = df_trend.drop_duplicates()
 
@@ -276,6 +283,12 @@ if __name__ == "__main__":
     # year is the same for all sector tables
     df_main["time"] = "2018"
 
+    # fix values
+    for cat, ent, new_value in fix_values_main:
+        df_main.loc[
+            (df_main["category"] == cat) & (df_main["entity"] == ent), "data"
+        ] = new_value
+
     ### convert to interchange format ###
     print("Converting to interchange format.")
     df_main_IF = pm2.pm2io.convert_long_dataframe_if(
@@ -318,3 +331,40 @@ if __name__ == "__main__":
         / (output_filename + coords_terminologies["category"] + "_raw.nc"),
         encoding=encoding,
     )
+
+    # # ###
+    # # Processing
+    # # ###
+
+    # create the gas baskets before aggregating the categories
+    data_proc_pm2 = process_data_for_country(
+        data_country=data_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        filter_dims=None,
+        cat_terminology_out=None,
+        category_conversion=None,
+        sectors_out=None,
+        processing_info_country=country_processing_step1,
+    )
+
+    # # ###
+    # # save processed data to IF and native format
+    # # ###
+
+    terminology_proc = coords_terminologies["category"]
+
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc), data_proc_if
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
+    )
+
+    print("Saved processed data.")