Browse Source

add code for ARE BUR1, some modifications for CHN BUR1/NC4 and functions

Johannes Gütschow 8 months ago
parent
commit
a264c50a2f

+ 2 - 0
src/unfccc_ghg_data/helper/__init__.py

@@ -34,6 +34,7 @@ from .functions import (
     get_country_name,
     get_country_name,
     make_wide_table,
     make_wide_table,
     process_data_for_country,
     process_data_for_country,
+    set_to_nan_in_ds,
 )
 )
 
 
 __all__ = [
 __all__ = [
@@ -63,4 +64,5 @@ __all__ = [
     "nAI_countries",
     "nAI_countries",
     "AI_countries",
     "AI_countries",
     "all_countries",
     "all_countries",
+    "set_to_nan_in_ds",
 ]
 ]

+ 30 - 0
src/unfccc_ghg_data/helper/functions.py

@@ -8,6 +8,7 @@ import copy
 import json
 import json
 import re
 import re
 import warnings
 import warnings
+from collections.abc import Hashable
 from copy import deepcopy
 from copy import deepcopy
 from datetime import date
 from datetime import date
 from pathlib import Path
 from pathlib import Path
@@ -1062,6 +1063,35 @@ def find_and_replace_values(
     return df
     return df
 
 
 
 
+def set_to_nan_in_ds(
+    ds_in: xr.Dataset,
+    entities: list[Hashable],
+    filter: dict[str, any],
+) -> xr.Dataset:
+    """
+    Set values to NaN in a data set.
+
+    Parameters
+    ----------
+    ds_in:
+        input dataset
+    entities
+        list of entities to work on
+    filter
+        .pr.loc type selector which selects the elements that should be replaced
+        with nan
+
+    Returns
+    -------
+        xr.Dataset with the desired values set to nan
+    """
+    ds_mask = xr.zeros_like(ds_in[entities].pr.loc[filter]).combine_first(
+        xr.ones_like(ds_in)
+    )
+
+    return ds_in.where(ds_mask)
+
+
 def assert_values(
 def assert_values(
     df: pd.DataFrame,
     df: pd.DataFrame,
     test_case: tuple[str | float | int],
     test_case: tuple[str | float | int],

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/China/__init__.py

@@ -1,6 +1,6 @@
 """Read China's BURs, NIRs, NCs
 """Read China's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read China's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 4 - 4
src/unfccc_ghg_data/unfccc_reader/China/config_chn_bur3_nc4.py

@@ -184,7 +184,7 @@ category_conversion = {
             "3.A": "3.A.1",
             "3.A": "3.A.1",
             "3.B": "3.A.2",
             "3.B": "3.A.2",
             "3.C": "3.C.7",
             "3.C": "3.C.7",
-            "3.D": "M.3.AS",
+            "3.D": "M.3.C.45.AG",
             "3.E": "3.C.1.c",
             "3.E": "3.C.1.c",
             "3.F": "3.C.1.b",
             "3.F": "3.C.1.b",
             "4": "M.LULUCF",
             "4": "M.LULUCF",
@@ -234,7 +234,7 @@ category_conversion = {
                 },
                 },
             },
             },
             "M.3.C.AG": {
             "M.3.C.AG": {
-                "sources": ["M.3.C.1.AG", "M.3.AS", "3.C.7"],
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
                 "filter": {
                 "filter": {
                     "entity": ["CH4", "N2O"],
                     "entity": ["CH4", "N2O"],
                 },
                 },
@@ -315,7 +315,7 @@ category_conversion = {
             "3.A": "3.A.1",
             "3.A": "3.A.1",
             "3.B": "3.A.2",
             "3.B": "3.A.2",
             "3.C": "3.C.7",
             "3.C": "3.C.7",
-            "3.D": "M.3.AS",
+            "3.D": "M.3.C.45.AG",
             "3.E": "3.C.1.c",
             "3.E": "3.C.1.c",
             "4": "M.LULUCF",
             "4": "M.LULUCF",
             "4.A.1": "3.B.1.a",
             "4.A.1": "3.B.1.a",
@@ -363,7 +363,7 @@ category_conversion = {
                 },
                 },
             },
             },
             "M.3.C.AG": {
             "M.3.C.AG": {
-                "sources": ["M.3.C.1.AG", "M.3.AS", "3.C.7"],
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
                 "filter": {
                 "filter": {
                     "entity": ["CH4"],
                     "entity": ["CH4"],
                 },
                 },

+ 11 - 6
src/unfccc_ghg_data/unfccc_reader/China/read_CHN_BUR3_from_pdf.py

@@ -13,7 +13,6 @@ from copy import deepcopy
 
 
 import camelot
 import camelot
 import primap2 as pm2
 import primap2 as pm2
-import xarray as xr
 
 
 from unfccc_ghg_data.helper import (
 from unfccc_ghg_data.helper import (
     compression,
     compression,
@@ -22,6 +21,7 @@ from unfccc_ghg_data.helper import (
     fix_rows,
     fix_rows,
     gas_baskets,
     gas_baskets,
     process_data_for_country,
     process_data_for_country,
+    set_to_nan_in_ds,
 )
 )
 from unfccc_ghg_data.unfccc_reader.China.config_chn_bur3_nc4 import (
 from unfccc_ghg_data.unfccc_reader.China.config_chn_bur3_nc4 import (
     category_conversion,
     category_conversion,
@@ -182,11 +182,16 @@ if __name__ == "__main__":
                         entity for entity in entities if entity in entities_current
                         entity for entity in entities if entity in entities_current
                     ]
                     ]
 
 
-                ds_mask = xr.zeros_like(
-                    data_country[entities].pr.loc[filter]
-                ).combine_first(xr.ones_like(data_country))
-
-                data_country = data_country.where(ds_mask)
+                data_country = set_to_nan_in_ds(
+                    data_country,
+                    entities=entities,
+                    filter=filter,
+                )
+                # ds_mask = xr.zeros_like(
+                #     data_country[entities].pr.loc[filter]
+                # ).combine_first(xr.ones_like(data_country))
+                #
+                # data_country = data_country.where(ds_mask)
 
 
         data_proc_pm2_new = process_data_for_country(
         data_proc_pm2_new = process_data_for_country(
             data_country,
             data_country,

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/__init__.py

@@ -0,0 +1,30 @@
+"""Read United Arab Emirates' BURs, NIRs, NCs
+
+Scripts and configurations to read United Arab Emirates' submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'ARE'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=ARE
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 201 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/config_are_bur1.py

@@ -0,0 +1,201 @@
+"""Config for United Arab Emirates BUR1
+
+Full configuration including PRIMAP2 conversion config and metadata
+
+"""
+
+#### configuration for PM2 format
+gwp_to_use = "AR4GWP100"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC1996_2006_ARE_Inv",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "ARE-GHG-Inventory",
+    "provenance": "measured",
+    "area": "ARE",
+    "scenario": "BUR1",
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": {
+        "Total": "0",
+        "Energy": "1",
+        "Fuel Combustion Activities": "1.A",
+        "Fugitive Emissions": "1.B",
+        "Venting": "1.B.2.c.1",
+        "Flaring": "1.B.2.c.2",
+        "Other Fugitives": "M.1.B.2.OF",
+        "IPPU": "2",
+        "Mineral Industry": "2.A",
+        "Cement": "2.A.1",
+        "Chemical Industry": "2.B",
+        "Ammonia": "2.B.1",
+        "Metal Industry": "2.C",
+        "Iron & Steel": "2.C.1",
+        "Aluminum": "2.C.3",
+        "Agriculture": "3",
+        "Enteric Fermentation": "3.A",
+        "Manure Management": "3.B",
+        "Managed Soils": "3.D",
+        "LUCF": "4",
+        "Waste": "5",  # waste is more or less in 2006 categories
+        "Solid Waste Disposal": "5.A",
+        "Landfill": "M.5.A.LF",
+        "Biological treatment": "5.B",
+        "Composting": "M.5.B.COMP",
+        "Incineration": "5.C.1",
+        "Wastewater": "5.D",
+        "Memo Items": "IGNORE",
+        "Aviation": "M.1.A",
+        "Marine bunker": "M.1.B",
+    },
+    "entity": {
+        "CO2": "CO2",
+        "CH4": "CH4",
+        "N2O": "N2O",
+        "CH4.1": f"CH4 ({gwp_to_use})",
+        "N2O.1": f"N2O ({gwp_to_use})",
+        "HFCs": f"HFCS ({gwp_to_use})",
+        "PFCs": f"PFCS ({gwp_to_use})",
+        "Total GHG": f"KYOTOGHG ({gwp_to_use})",
+    },
+}
+
+filter_remove = {
+    "rem_cat": {"category": ["Memo Items"]},
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/635318",
+    "rights": "",
+    "contact": "mail@johannes-guestchow.de",
+    "title": "United Arab Emirates. National Communication (NC). NC 5. Biennial Update Report (BUR). BUR 1.",
+    "comment": "Read fom pdf by Johannes Gütschow",
+    "institution": "UNFCCC",
+}
+
+## processing iconfig
+terminology_proc = "IPCC2006_PRIMAP"
+
+category_conversion = {
+    "mapping": {
+        "0": "0",
+        "1": "1",
+        "1.A": "1.A",
+        "1.B": "1.B",
+        "2": "2",
+        "2.A": "2.A",
+        "2.A.1": "2.A.1",
+        "2.B": "2.B",
+        "2.B.1": "2.B.1",
+        "2.C": "2.C",
+        "2.C.1": "2.C.1",
+        "2.C.3": "2.C.3",
+        "3": "M.AG",
+        "3.A": "3.A.1",
+        "3.B": "3.A.2",
+        "3.D": "M.3.C.45.AG",
+        "4": "M.LULUCF",
+        "5": "4",
+        "5.A": "4.A",
+        "5.B": "4.B",
+        "5.C.1": "4.C.1",
+        "5.D": "4.D",
+        "M.1.A": "M.BK.A",
+        "M.1.B": "M.BK.M",
+        "1.B.2.c.1": "M.1.B.2.VEN",
+        "1.B.2.c.2": "M.1.B.2.FL",
+        "M.1.B.2.OF": "M.1.B.2.OF",
+        # "M.5.A.LF": "",
+        # "M.5.B.COMP": "",
+    },
+    "aggregate": {
+        "1.B.2": {
+            "sources": ["M.1.B.2.VEN", "M.1.B.2.FL", "M.1.B.2.OF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "2": {
+            "sources": ["2.G"],
+            "filter": {
+                "entity": ["HFCS"],
+            },
+        },
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            "filter": {
+                "entity": ["CH4", "N2O"],
+            },
+        },
+        "3.C": {
+            "sources": ["M.3.C.45.AG"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG.ELV": {
+            "sources": ["3.C"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG": {  # consitency check
+            "sources": ["3.A", "M.AG.ELV"],
+            "filter": {
+                "entity": ["N2O", "CH4"],
+            },
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "4.C": {
+            "sources": ["4.C.1"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.BK": {
+            "sources": ["M.BK.A", "M.BK.M"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+        "0": {  # consistency check
+            "sources": ["1", "2", "3", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+    },
+}
+
+processing_info_country = {
+    "basket_copy": {
+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        "entities": ["PFCS", "HFCS"],
+        "source_GWP": gwp_to_use,
+    },
+}

+ 155 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/read_ARE_BUR1_from_csv.py

@@ -0,0 +1,155 @@
+"""
+Read data from United Arab Emirates' BUR!.
+
+Data are read a csv file which contains data manually copied from the pdf,
+which was necessary as multiple tables are not machine readable.
+The file contains an inventory for 2021.
+
+"""
+
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    compression,
+    downloaded_data_path,
+    extracted_data_path,
+    gas_baskets,
+    process_data_for_country,
+    set_to_nan_in_ds,
+)
+from unfccc_ghg_data.unfccc_reader.United_Arab_Emirates.config_are_bur1 import (
+    category_conversion,
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    filter_remove,
+    gwp_to_use,
+    meta_data,
+    processing_info_country,
+    terminology_proc,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+    input_folder = downloaded_data_path / "UNFCCC" / "United_Arab_Emirates" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "United_Arab_Emirates"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "ARE_BUR1_"
+    inventory_file = "all_data_manual.csv"
+
+    year = 2021
+    time_format = "%Y"
+
+    # ###
+    # read the tables from csv
+    # ###
+    data_pd = pd.read_csv(input_folder / inventory_file)
+
+    data_pd = pm2.pm2io.nir_add_unit_information(
+        data_pd,
+        unit_row=0,
+        entity_row="header",
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="",
+    )
+    data_pd = data_pd.set_index(data_pd.columns[0])
+    table_long = pm2.pm2io.nir_convert_df_to_long(
+        data_pd,
+        year=year,
+        header_long=["category", "entity", "unit", "time", "data"],
+    )
+
+    # drop CH4, N2O with GWP
+    idx_gwp = table_long[table_long["entity"].isin(["CH4.1", "N2O.1"])].index
+    table_long = table_long.drop(index=idx_gwp)
+
+    data_if = pm2.pm2io.convert_long_dataframe_if(
+        table_long,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        time_format=time_format,
+    )
+
+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_if = data_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"]),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+        encoding=encoding,
+    )
+
+    ### processing
+    data_proc_pm2 = data_pm2.copy()
+
+    # move HFCs from energy to IPPU as their use in electrical
+    # equipment is reported there
+    da_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.loc[{"category": "1"}]
+    ds_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.set(
+        "category", "2.G", da_HFCs, existing="overwrite"
+    )
+    data_proc_pm2 = data_proc_pm2.pr.merge(ds_HFCs)
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"HFCS ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"KYOTOGHG ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+
+    # actual processing
+    data_proc_pm2 = process_data_for_country(
+        data_proc_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        processing_info_country=processing_info_country,
+        cat_terminology_out=terminology_proc,
+        category_conversion=category_conversion,
+    )
+
+    # adapt source and metadata
+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_proc_if,
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )