Browse Source

Korea 2022 inventory read, Chile BUR5 preparations

Johannes 1 year ago
parent
commit
9a2116f3ec

+ 281 - 0
UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR5_from_xlsx.py

@@ -0,0 +1,281 @@
+# this script reads data from Chile's 2020 national inventory which is underlying BUR4
+# Data is read from the xlsx file
+
+import os
+import sys
+import pandas as pd
+import primap2 as pm2
+
+from config_CHL_BUR4 import cat_mapping, filter_remove_IPCC2006, aggregate_cats
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from primap2.pm2io._data_reading import matches_time_format
+from primap2.pm2io._data_reading import filter_data
+
+# ###
+# configuration
+# ###
+
+# folders and files
+input_folder = downloaded_data_path / 'UNFCCC' / 'Chile' / 'BUR5'
+output_folder = extracted_data_path / 'UNFCCC' / 'Chile'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+output_filename = 'CHL_BUR5_2022_'
+
+inventory_file = 'Inventario_Nacional_de_GEI-1990-2018.xlsx'
+years_to_read = range(1990, 2018 + 1)
+
+# configuration for conversion to PRIMAP2 data format
+unit_row = "header"
+unit_info = {
+    'regexp_entity': r'(.*)\s\(.*\)$',
+    'regexp_unit': r'.*\s\((.*)\)$',
+    'default_unit': 'kt',
+    'manual_repl_unit': {
+        'kt CO₂ eq': 'ktCO2eq',
+        'HFC (kt CO₂ eq)': 'ktCO2eq',
+        'PFC (kt CO₂ eq)': 'ktCO2eq',
+        'SF₆ (kt CO₂ eq)': 'ktCO2eq',
+    },
+    'manual_repl_entity': {
+        'kt CO₂ eq': 'KYOTOGHG (AR4GWP100)',
+        'HFC (kt CO₂ eq)': 'HFCS (AR4GWP100)',
+        'PFC (kt CO₂ eq)': 'PFCS (AR4GWP100)',
+        'SF₆ (kt CO₂ eq)': 'SF6 (AR4GWP100)',
+    }
+}
+cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
+                'Categorías de fuente y sumidero de gases de efecto invernadero.1']
+# columns for category UNFCCC_GHG_data and original category name
+index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
+
+# operations on long format DF
+cols_for_space_stripping = ['category', 'orig_cat_name', 'entity']
+
+time_format = "%Y"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_1996_Chile_NIR",
+    "scenario": "PRIMAP",
+}
+
+coords_terminologies_2006 = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "CHL-GHG-Inventory",
+    "provenance": "measured",
+    "area": "CHL",
+    "scenario": "BUR4"
+}
+
+coords_value_mapping = {
+    "entity": {
+        "COVDM": "NMVOC",
+        "CO₂ neto": "CO2",
+        "CH₄": "CH4",
+        # "HFC": "HFCS",
+        "HFC-125": "HFC125",
+        "HFC-134a": "HFC134a",
+        "HFC-143a": "HFC143a",
+        "HFC-152a": "HFC152a",
+        "HFC-227ea": "HFC227ea",
+        "HFC-23": "HFC23",
+        "HFC-236fa": "HFC236fa",
+        "HFC-245fa": "HFC245fa",
+        "HFC-32": "HFC32",
+        "HFC-365mfc": "HFC365mfc",
+        "HFC-43-10mee": "HFC4310mee",
+        "N₂O": "N2O",
+        # "PFC": "PFCS",
+        "PFC-116": "C2F6",
+        "PFC-14": "CF4",
+        "PFC-218": "C3F8",
+        # "SF₆": "SF6",
+        "SO₂": "SO2",
+    },
+    "unit": "PRIMAP1",
+}
+
+coords_value_filling = {
+    'category': {  # col to fill
+        'orig_cat_name': {  # col to fill from
+            'Todas las emisiones y las absorciones nacionales': '0',  # from value: to value
+            'Tanque internacional': 'M.BK',
+            'Aviación internacional': 'M.BK.A',
+            'Navegación internacional': 'M.BK.M',
+            'Operaciones multilaterales': 'M.MULTIOP',
+            'Emisiones de CO2 de la biomasa': 'M.BIO',
+        }
+    }
+}
+
+filter_remove = {
+    "f1": {
+        "entity": ["Absorciones CO₂", "Emisiones CO₂"],
+    },
+    "f2": {
+        "orig_cat_name": ["Partidas informativas"],
+    },
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/267936, https://snichile.mma.gob.cl/wp-content/uploads/2021/03/Inventario_Nacional_de_GEI-1990-2018.xlsx",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de.de",
+    "title": "Chile: BUR4",
+    "comment": "Read fom xlsx file by Johannes Gütschow",
+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+}
+
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# start data reading
+# ###
+
+# change working directory to script directory for proper folder names
+script_path = os.path.abspath(sys.argv[0])
+script_dir_name = os.path.dirname(script_path)
+os.chdir(script_dir_name)
+
+df_all = None
+
+for year in years_to_read:
+    # read sheet for the year. Each sheet contains several tables,
+    # we only read the upper row as the other tables are summary tables
+    df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
+    # drop the columns which are empty and repetition of the metadata for the second block
+    df_current.drop(cols_to_drop, axis=1, inplace=True)
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
+    # as without one of them there is no category information
+    df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
+    # set multi-index. necessary for the stack operation in the conversion to long format
+    df_current = df_current.set_index(index_cols)
+    # add unit row using information from entity row and add to index
+    df_current = pm2.pm2io.nir_add_unit_information(df_current, unit_row=unit_row, **unit_info)
+    # actual conversion to long format
+    df_current = pm2.pm2io.nir_convert_df_to_long(df_current, year)
+    # aggregate to one df
+    if df_all is None:
+        df_all = df_current
+    else:
+        df_all = pd.concat([df_all, df_current])
+
+df_all = df_all.reset_index(drop=True)
+
+# ###
+# postprocessing
+# ###
+# strip trailing and leading spaces
+for col in cols_for_space_stripping:
+    df_all[col] = df_all[col].str.strip()
+
+df_all["category"] = df_all["category"].str.rstrip('.')
+
+data_if = pm2.pm2io.convert_long_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    filter_keep=filter_keep,
+    meta_data=meta_data
+)
+
+
+#conversion to PRIMAP2 native format
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+# convert back to IF to have units in the fixed format
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# save data to IF and native format
+# ###
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
+
+# ###
+# conversion to ipcc 2006 categories
+# ###
+
+data_if_2006 = pm2.pm2io.convert_long_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies_2006,
+    coords_value_mapping=coords_value_mapping,
+    coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    filter_keep=filter_keep,
+    meta_data=meta_data
+)
+
+cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
+filter_data(data_if_2006, filter_remove=filter_remove_IPCC2006)
+data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
+
+# aggregate categories
+for cat_to_agg in aggregate_cats:
+    mask = data_if_2006[cat_label].isin(aggregate_cats[cat_to_agg]["sources"])
+    df_test = data_if_2006[mask]
+
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity', 'unit']).sum()
+
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        data_if_2006 = pd.concat([data_if_2006, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+#conversion to PRIMAP2 native format
+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
+# convert back to IF to have units in the fixed format
+data_if_2006 = data_pm2_2006.pr.to_interchange_format()
+
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
+
+encoding = {var: compression for var in data_pm2_2006.data_vars}
+data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)

+ 3 - 2
UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py

@@ -185,11 +185,11 @@ data_if = data_pm2.pr.to_interchange_format()
 # ###
 if not output_folder.exists():
     output_folder.mkdir()
-pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
+#pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
 
 data_pm2 = pm2.pm2io.from_interchange_format(data_if)
 encoding = {var: compression for var in data_pm2.data_vars}
-data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
+#data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
 
 # ###
 # conversion to ipcc 2006 categories
@@ -233,6 +233,7 @@ for cat_to_agg in aggregate_before_mapping:
             by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
                 'unit']).sum()
 
+
         df_combine.insert(0, cat_label, cat_to_agg)
         df_combine.insert(1, "orig_cat_name",
                           aggregate_before_mapping[cat_to_agg]["name"])

+ 318 - 0
UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2022-Inventory_from_xlsx.py

@@ -0,0 +1,318 @@
+# this script reads data from Korea's 2021 national inventory which is underlying BUR4
+# Data is read from the xlsx file
+
+import os
+import sys
+import pandas as pd
+import primap2 as pm2
+
+from config_KOR_BUR4 import cat_name_translations, cat_codes
+from config_KOR_BUR4 import remove_cats, aggregate_before_mapping, cat_mapping, \
+    aggregate_after_mapping, coords_terminologies_2006, filter_remove_2006, \
+    filter_remove_after_agg
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from primap2.pm2io._data_reading import filter_data, matches_time_format
+from UNFCCC_GHG_data.helper import process_data_for_country
+
+# ###
+# configuration
+# ###
+input_folder = downloaded_data_path / 'non-UNFCCC' / 'Republic_of_Korea' / \
+               '2022-Inventory'
+output_folder = extracted_data_path / 'non-UNFCCC' / 'Republic_of_Korea'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+output_filename = 'KOR_2022-Inventory_2022_'
+
+inventory_file = 'Republic_of_Korea_National_GHG_Inventory_(1990_2020).xlsx'
+years_to_read = range(1990, 2020 + 1)
+
+sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
+cols_to_read = range(1, 2020 - 1990 + 3)
+
+# columns for category UNFCCC_GHG_data and original category name
+index_cols = ['분야·부문/연도']
+
+sheet_metadata = {
+    'entity': {
+        '온실가스': 'KYOTOGHG (SARGWP100)',
+        'CO2': 'CO2',
+        'CH4': 'CH4 (SARGWP100)',
+        'N2O': 'N2O (SARGWP100)',
+        'HFCs': 'HFCS (SARGWP100)',
+        'PFCs': 'PFCS (SARGWP100)',
+        'SF6': 'SF6 (SARGWP100)',
+    },
+    'unit': {
+        '온실가스': 'Gg CO2 / yr',
+        'CO2': 'Gg CO2 / yr',
+        'CH4': 'Gg CO2 / yr',
+        'N2O': 'Gg CO2 / yr',
+        'HFCs': 'Gg CO2 / yr',
+        'PFCs': 'Gg CO2 / yr',
+        'SF6': 'Gg CO2 / yr',
+    }
+}
+
+# definitions for conversion to interchange format
+time_format = "%Y"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+    "cat_name_translation": ["cat_name_translation", "category"]
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC1996_KOR_INV",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "KOR-GHG-Inventory",
+    "provenance": "measured",
+    "area": "KOR",
+    "scenario": "INV2022",
+}
+
+coords_value_mapping = {
+    "cat_name_translation": cat_name_translations,
+    "category": cat_codes,
+}
+
+# filtering after IF creation to be able to use the IPCC codes
+filter_remove = {
+    "f1": {
+        "category (IPCC1996_KOR_INV)": "\IGNORE",
+    },
+    "livestock": { # temp until double cat name problem is solved
+        "category (IPCC1996_KOR_INV)": [
+            '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
+            '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
+        ]
+    }
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "http://www.gir.go.kr/home/file/readDownloadFile.do?fileId=5810&fileSeq=3",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2022",
+    "comment": "Read fom xlsx file by Johannes Gütschow",
+    "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
+}
+
+
+
+cols_for_space_stripping = []
+
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# start data reading
+# ###
+
+# change working directory to script directory for proper folder names
+script_path = os.path.abspath(sys.argv[0])
+script_dir_name = os.path.dirname(script_path)
+os.chdir(script_dir_name)
+
+df_all = None
+
+for sheet in sheets_to_read:
+    # read current sheet (one sheet per gas)
+    df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
+                               engine="openpyxl")
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
+    # as without one of them there is no category information
+    df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
+    # set index. necessary for the stack operation in the conversion to long format
+    # df_current = df_current.set_index(index_cols)
+    # make sure all col headers are str
+    df_current.columns = df_current.columns.map(str)
+    # add columns
+    for col in sheet_metadata.keys():
+        df_current.insert(1, col, sheet_metadata[col][sheet])
+    # aggregate to one df
+    if df_all is None:
+        df_all = df_current
+    else:
+        df_all = pd.concat([df_all, df_current])
+
+df_all = df_all.reset_index(drop=True)
+# rename category col because filtering produces problems with korean col names
+df_all.rename(columns={"분야·부문/연도": "category"}, inplace=True)
+
+# create copies of category col for further processing
+df_all["orig_cat_name"] = df_all["category"]
+df_all["cat_name_translation"] = df_all["category"]
+
+
+
+# ###
+# convert to PRIMAP2 interchange format
+# ###
+data_if = pm2.pm2io.convert_wide_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    #filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    copy_df=True, # we need the unchanged DF for the conversion step
+    )
+
+filter_data(data_if, filter_remove=filter_remove)
+
+#conversion to PRIMAP2 native format
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+# convert back to IF to have units in the fixed format
+data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# save data to IF and native format
+# ###
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
+
+# ###
+# conversion to ipcc 2006 categories
+# ###
+
+
+data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies_2006,
+    coords_value_mapping=coords_value_mapping,
+    meta_data=meta_data,
+    convert_str=True,
+    copy_df=True,  # don't mess up the dataframe when testing
+)
+
+cat_label = 'category (' + coords_terminologies_2006["category"] + ')'
+# agg before mapping
+
+for cat_to_agg in aggregate_before_mapping:
+    mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
+    df_test = data_if_2006[mask]
+
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
+                'unit']).sum()
+
+        df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name",
+                          aggregate_before_mapping[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
+            filter_this_cat = {
+                "f": {cat_label: cat_to_agg}
+            }
+            filter_data(data_if_2006, filter_remove=filter_this_cat)
+
+        data_if_2006 = pd.concat([data_if_2006, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+# filtering
+filter_data(data_if_2006, filter_remove=filter_remove_2006)
+
+# map 1 to 1 categories
+data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
+data_if_2006[cat_label].unique()
+
+# agg after mapping
+
+for cat_to_agg in aggregate_after_mapping:
+    mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
+    df_test = data_if_2006[mask]
+
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
+                'unit']).sum()
+
+        df_combine = df_combine.drop(columns=["category (IPCC2006_PRIMAP)", "orig_cat_name", "cat_name_translation"])
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name",
+                          aggregate_after_mapping[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
+            filter_this_cat = {
+                "f": {cat_label: cat_to_agg}
+            }
+            filter_data(data_if_2006, filter_remove=filter_this_cat)
+
+        data_if_2006 = pd.concat([data_if_2006, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+
+#conversion to PRIMAP2 native format
+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
+# convert back to IF to have units in the fixed format
+data_pm2_2006 = data_pm2_2006.reset_coords(["orig_cat_name", "cat_name_translation"],
+                                       drop=True)
+data_if_2006 = data_pm2_2006.pr.to_interchange_format()
+# save IPCC2006 data
+
+filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
+pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies_2006["category"]), data_if_2006)
+
+encoding = {var: compression for var in data_pm2_2006.data_vars}
+data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"), encoding=encoding)

+ 1 - 1
UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py

@@ -3,7 +3,7 @@
 
 import datalad.api
 import argparse
-from get_submissions_info import get_code_file
+from UNFCCC_GHG_data.helper import get_code_file
 from get_submissions_info import get_possible_inputs
 from get_submissions_info import get_possible_outputs
 from UNFCCC_GHG_data.helper import root_path

+ 2 - 0
UNFCCC_GHG_data/helper/__init__.py

@@ -6,6 +6,7 @@ from .definitions import dataset_path, dataset_path_UNFCCC
 from .definitions import custom_country_mapping, custom_folders
 from .functions import get_country_code, get_country_name, convert_categories
 from .functions import create_folder_mapping, process_data_for_country
+from .functions import get_code_file
 
 __all__ = [
     "root_path",
@@ -25,4 +26,5 @@ __all__ = [
     "convert_categories",
     "create_folder_mapping",
     "process_data_for_country",
+    "get_code_file"
 ]

+ 1 - 0
downloaded_data/UNFCCC/Chile/BUR5/2022_GEI_CL.xlsx

@@ -0,0 +1 @@
+../../../non-UNFCCC/Chile/2022-Inventory/2022_GEI_CL.xlsx

+ 169 - 169
extracted_data/UNFCCC/folder_mapping.json

@@ -1,194 +1,194 @@
 {
-    "GHA": "Ghana",
-    "STP": "Sao_Tome_and_Principe",
-    "KOR": "Republic_of_Korea",
-    "MWI": "Malawi",
-    "SAU": "Saudi_Arabia",
-    "SDN": "Sudan",
-    "MDG": "Madagascar",
-    "BLR": "Belarus",
-    "VUT": "Vanuatu",
-    "UGA": "Uganda",
-    "TUV": "Tuvalu",
-    "GUY": "Guyana",
-    "SRB": "Serbia",
-    "BEN": "Benin",
-    "DNK": "Denmark",
-    "OMN": "Oman",
-    "MEX": "Mexico",
-    "LSO": "Lesotho",
-    "CAF": "Central_African_Republic",
+    "MDV": "Maldives",
+    "HND": "Honduras",
+    "SGP": "Singapore",
+    "CPV": "Cabo_Verde",
+    "MDA": "Moldova,_Republic_of",
+    "LIE": "Liechtenstein",
+    "HTI": "Haiti",
+    "LAO": "Lao_People's_Democratic_Republic",
+    "YEM": "Yemen",
+    "ROU": "Romania",
+    "JOR": "Jordan",
+    "MNE": "Montenegro",
+    "GRD": "Grenada",
+    "USA": "United_States",
     "KWT": "Kuwait",
+    "BEL": "Belgium",
+    "DMA": "Dominica",
+    "THA": "Thailand",
+    "AGO": "Angola",
+    "BEN": "Benin",
+    "URY": "Uruguay",
+    "FSM": "Micronesia,_Federated_States_of",
     "FRA": "France",
-    "LIE": "Liechtenstein",
+    "SYR": "Syrian_Arab_Republic",
+    "MDG": "Madagascar",
+    "LSO": "Lesotho",
+    "LBR": "Liberia",
+    "GMB": "Gambia",
+    "VCT": "Saint_Vincent_and_the_Grenadines",
+    "KIR": "Kiribati",
+    "VNM": "Viet_Nam",
+    "NIU": "Niue",
+    "RWA": "Rwanda",
+    "COM": "Comoros",
+    "MNG": "Mongolia",
+    "BLZ": "Belize",
+    "ESP": "Spain",
+    "ALB": "Albania",
+    "IRQ": "Iraq",
+    "TUV": "Tuvalu",
+    "MUS": "Mauritius",
+    "BTN": "Bhutan",
+    "KAZ": "Kazakhstan",
+    "ARE": "United_Arab_Emirates",
+    "SMR": "San_Marino",
     "CUB": "Cuba",
-    "BRA": "Brazil",
+    "PLW": "Palau",
     "CRI": "Costa_Rica",
-    "VNM": "Viet_Nam",
-    "QAT": "Qatar",
-    "NAM": "Namibia",
-    "MLT": "Malta",
-    "SVN": "Slovenia",
-    "BGR": "Bulgaria",
-    "CZE": "Czechia",
-    "PHL": "Philippines",
-    "MHL": "Marshall_Islands",
-    "URY": "Uruguay",
-    "NER": "Niger",
+    "DEU": "Germany",
+    "TGO": "Togo",
+    "BRB": "Barbados",
+    "NPL": "Nepal",
+    "CMR": "Cameroon",
+    "KNA": "Saint_Kitts_and_Nevis",
+    "CAF": "Central_African_Republic",
+    "GBR": "United_Kingdom",
+    "BWA": "Botswana",
+    "ISL": "Iceland",
+    "BHR": "Bahrain",
     "NZL": "New_Zealand",
-    "LAO": "Lao_People's_Democratic_Republic",
-    "THA": "Thailand",
+    "ECU": "Ecuador",
+    "SDN": "Sudan",
+    "CHN": "China",
+    "JPN": "Japan",
+    "VUT": "Vanuatu",
+    "CHL": "Chile",
+    "SSD": "South_Sudan",
+    "ITA": "Italy",
+    "EUA": "European_Union",
+    "LVA": "Latvia",
+    "PHL": "Philippines",
+    "GIN": "Guinea",
+    "MOZ": "Mozambique",
+    "BRA": "Brazil",
+    "TKM": "Turkmenistan",
     "SUR": "Suriname",
-    "TJK": "Tajikistan",
+    "BRN": "Brunei_Darussalam",
+    "ZWE": "Zimbabwe",
+    "GAB": "Gabon",
+    "FJI": "Fiji",
     "CHE": "Switzerland",
-    "ISL": "Iceland",
-    "DMA": "Dominica",
-    "SGP": "Singapore",
-    "VEN": "Venezuela,_Bolivarian_Republic_of",
-    "LKA": "Sri_Lanka",
-    "GMB": "Gambia",
-    "GRC": "Greece",
-    "BHR": "Bahrain",
-    "SMR": "San_Marino",
-    "PRY": "Paraguay",
-    "BOL": "Bolivia,_Plurinational_State_of",
-    "SLV": "El_Salvador",
-    "LTU": "Lithuania",
-    "GBR": "United_Kingdom",
-    "AFG": "Afghanistan",
-    "USA": "United_States",
+    "DOM": "Dominican_Republic",
+    "PAK": "Pakistan",
+    "DZA": "Algeria",
+    "PAN": "Panama",
+    "PSE": "Palestine,_State_of",
+    "ATG": "Antigua_and_Barbuda",
+    "NAM": "Namibia",
+    "MLT": "Malta",
+    "TLS": "Timor-Leste",
+    "DNK": "Denmark",
+    "KGZ": "Kyrgyzstan",
+    "GEO": "Georgia",
+    "KOR": "Republic_of_Korea",
+    "BHS": "Bahamas",
     "TUR": "Turkey",
-    "IND": "India",
+    "ARG": "Argentina",
+    "CYP": "Cyprus",
+    "MRT": "Mauritania",
     "NOR": "Norway",
-    "PAK": "Pakistan",
+    "COK": "Cook_Islands",
+    "BDI": "Burundi",
+    "MHL": "Marshall_Islands",
     "LBN": "Lebanon",
-    "AUT": "Austria",
-    "IRN": "Iran,_Islamic_Republic_of",
-    "BRB": "Barbados",
     "UKR": "Ukraine",
-    "ARG": "Argentina",
-    "ZAF": "South_Africa",
-    "BRN": "Brunei_Darussalam",
-    "BGD": "Bangladesh",
-    "PNG": "Papua_New_Guinea",
-    "KHM": "Cambodia",
+    "PER": "Peru",
     "FIN": "Finland",
-    "EGY": "Egypt",
-    "ATG": "Antigua_and_Barbuda",
-    "TGO": "Togo",
-    "ALB": "Albania",
-    "DOM": "Dominican_Republic",
-    "BWA": "Botswana",
-    "BEL": "Belgium",
-    "TLS": "Timor-Leste",
-    "MCO": "Monaco",
-    "RWA": "Rwanda",
-    "MDA": "Moldova,_Republic_of",
-    "SSD": "South_Sudan",
-    "AGO": "Angola",
-    "PLW": "Palau",
-    "HTI": "Haiti",
-    "KAZ": "Kazakhstan",
-    "YEM": "Yemen",
-    "KEN": "Kenya",
-    "MDV": "Maldives",
-    "JPN": "Japan",
-    "CYP": "Cyprus",
-    "BIH": "Bosnia_and_Herzegovina",
-    "ESP": "Spain",
-    "NLD": "Netherlands",
+    "COG": "Congo",
+    "ZMB": "Zambia",
     "PRK": "Korea,_Democratic_People's_Republic_of",
-    "GIN": "Guinea",
-    "CHN": "China",
-    "MMR": "Myanmar",
-    "AZE": "Azerbaijan",
-    "KIR": "Kiribati",
-    "BDI": "Burundi",
-    "BHS": "Bahamas",
-    "MKD": "North_Macedonia",
-    "MRT": "Mauritania",
-    "LVA": "Latvia",
-    "NGA": "Nigeria",
-    "KGZ": "Kyrgyzstan",
-    "DZA": "Algeria",
-    "TKM": "Turkmenistan",
-    "GNB": "Guinea-Bissau",
-    "ZWE": "Zimbabwe",
-    "CIV": "C\u00f4te_d'Ivoire",
-    "LBR": "Liberia",
-    "SLB": "Solomon_Islands",
-    "CPV": "Cabo_Verde",
-    "DJI": "Djibouti",
+    "LTU": "Lithuania",
+    "COD": "Congo,_The_Democratic_Republic_of_the",
+    "TTO": "Trinidad_and_Tobago",
+    "SWZ": "Eswatini",
+    "LCA": "Saint_Lucia",
     "MAR": "Morocco",
-    "COM": "Comoros",
-    "CAN": "Canada",
-    "DEU": "Germany",
+    "QAT": "Qatar",
     "COL": "Colombia",
-    "WSM": "Samoa",
-    "CMR": "Cameroon",
-    "SWE": "Sweden",
-    "CHL": "Chile",
-    "JOR": "Jordan",
-    "AUS": "Australia",
-    "SYR": "Syrian_Arab_Republic",
+    "IRL": "Ireland",
+    "ZAF": "South_Africa",
+    "IDN": "Indonesia",
+    "BIH": "Bosnia_and_Herzegovina",
+    "NER": "Niger",
+    "MYS": "Malaysia",
+    "MWI": "Malawi",
+    "GNB": "Guinea-Bissau",
+    "PNG": "Papua_New_Guinea",
+    "MKD": "North_Macedonia",
+    "OMN": "Oman",
+    "LKA": "Sri_Lanka",
+    "IRN": "Iran,_Islamic_Republic_of",
+    "AUT": "Austria",
+    "MMR": "Myanmar",
+    "BGR": "Bulgaria",
+    "SLV": "El_Salvador",
+    "SYC": "Seychelles",
+    "AZE": "Azerbaijan",
+    "TON": "Tonga",
+    "BGD": "Bangladesh",
+    "GUY": "Guyana",
     "POL": "Poland",
-    "MUS": "Mauritius",
-    "FSM": "Micronesia,_Federated_States_of",
-    "PER": "Peru",
+    "TUN": "Tunisia",
+    "STP": "Sao_Tome_and_Principe",
+    "ISR": "Israel",
+    "PRT": "Portugal",
+    "LUX": "Luxembourg",
+    "CZE": "Czechia",
+    "KEN": "Kenya",
     "BFA": "Burkina_Faso",
-    "EUA": "European_Union",
-    "LCA": "Saint_Lucia",
-    "NRU": "Nauru",
+    "GRC": "Greece",
+    "SLB": "Solomon_Islands",
     "GTM": "Guatemala",
-    "COD": "Congo,_The_Democratic_Republic_of_the",
-    "COG": "Congo",
-    "GAB": "Gabon",
-    "SYC": "Seychelles",
-    "NIC": "Nicaragua",
-    "PSE": "Palestine,_State_of",
-    "COK": "Cook_Islands",
-    "TCD": "Chad",
-    "SWZ": "Eswatini",
-    "ARE": "United_Arab_Emirates",
-    "MNG": "Mongolia",
-    "MYS": "Malaysia",
-    "VCT": "Saint_Vincent_and_the_Grenadines",
-    "JAM": "Jamaica",
-    "MNE": "Montenegro",
+    "NLD": "Netherlands",
+    "GHA": "Ghana",
+    "KHM": "Cambodia",
+    "NGA": "Nigeria",
+    "MCO": "Monaco",
+    "SWE": "Sweden",
+    "AFG": "Afghanistan",
+    "UZB": "Uzbekistan",
+    "BOL": "Bolivia,_Plurinational_State_of",
+    "RUS": "Russian_Federation",
+    "EST": "Estonia",
+    "WSM": "Samoa",
     "HUN": "Hungary",
-    "SEN": "Senegal",
+    "JAM": "Jamaica",
+    "EGY": "Egypt",
+    "PRY": "Paraguay",
+    "SVN": "Slovenia",
+    "BLR": "Belarus",
+    "TCD": "Chad",
+    "TJK": "Tajikistan",
     "SVK": "Slovakia",
-    "EST": "Estonia",
-    "ITA": "Italy",
-    "PAN": "Panama",
-    "IRQ": "Iraq",
-    "ISR": "Israel",
-    "KNA": "Saint_Kitts_and_Nevis",
-    "PRT": "Portugal",
-    "HRV": "Croatia",
-    "BTN": "Bhutan",
-    "RUS": "Russian_Federation",
-    "TTO": "Trinidad_and_Tobago",
-    "ROU": "Romania",
+    "SEN": "Senegal",
+    "IND": "India",
+    "MLI": "Mali",
+    "VEN": "Venezuela,_Bolivarian_Republic_of",
+    "DJI": "Djibouti",
     "TZA": "Tanzania,_United_Republic_of",
-    "TON": "Tonga",
-    "ERI": "Eritrea",
-    "NPL": "Nepal",
-    "GEO": "Georgia",
-    "ECU": "Ecuador",
-    "UZB": "Uzbekistan",
-    "FJI": "Fiji",
-    "TUN": "Tunisia",
     "ARM": "Armenia",
-    "MLI": "Mali",
-    "MOZ": "Mozambique",
-    "LUX": "Luxembourg",
-    "NIU": "Niue",
-    "IRL": "Ireland",
-    "GRD": "Grenada",
-    "HND": "Honduras",
+    "NRU": "Nauru",
     "ETH": "Ethiopia",
-    "BLZ": "Belize",
-    "IDN": "Indonesia",
-    "ZMB": "Zambia"
+    "HRV": "Croatia",
+    "CAN": "Canada",
+    "NIC": "Nicaragua",
+    "CIV": "C\u00f4te_d'Ivoire",
+    "ERI": "Eritrea",
+    "AUS": "Australia",
+    "MEX": "Mexico",
+    "UGA": "Uganda",
+    "SRB": "Serbia",
+    "SAU": "Saudi_Arabia"
 }

+ 1 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC1996_KOR_INV.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/m7/pj/MD5E-s199234--49808980dc1b803b43aa2f2906c512b9.csv/MD5E-s199234--49808980dc1b803b43aa2f2906c512b9.csv

+ 1 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC1996_KOR_INV.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/Vk/Z7/MD5E-s122919--39fe77ee76ae47418786a77d7a52e441.nc/MD5E-s122919--39fe77ee76ae47418786a77d7a52e441.nc

+ 23 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC1996_KOR_INV.yaml

@@ -0,0 +1,23 @@
+attrs:
+  references: http://www.gir.go.kr/home/file/readDownloadFile.do?fileId=5810&fileSeq=3
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 'Republic of Korea: National Greenhouse Gas Inventory Report 2022'
+  comment: Read fom xlsx file by Johannes Gütschow
+  institution: Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory
+    and Research Center
+  cat: category (IPCC1996_KOR_INV)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - source
+  - category (IPCC1996_KOR_INV)
+  - area (ISO3)
+  - entity
+  - unit
+data_file: KOR_2022-Inventory_2022_IPCC1996_KOR_INV.csv

+ 1 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/xv/V5/MD5E-s162109--ac88f35e713446c5a070705023f24d1f.csv/MD5E-s162109--ac88f35e713446c5a070705023f24d1f.csv

+ 1 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/mf/0w/MD5E-s111427--caeafc1ab76ed41564dad45e245fdd96.nc/MD5E-s111427--caeafc1ab76ed41564dad45e245fdd96.nc

+ 23 - 0
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2022-Inventory_2022_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,23 @@
+attrs:
+  references: http://www.gir.go.kr/home/file/readDownloadFile.do?fileId=5810&fileSeq=3
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 'Republic of Korea: National Greenhouse Gas Inventory Report 2022'
+  comment: Read fom xlsx file by Johannes Gütschow
+  institution: Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory
+    and Research Center
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - source
+  - area (ISO3)
+  - category (IPCC2006_PRIMAP)
+  - entity
+  - unit
+data_file: KOR_2022-Inventory_2022_IPCC2006_PRIMAP.csv