Browse Source

add code for ARE BUR1, some modifications for CHN BUR1/NC4 and functions

Johannes Gütschow 8 months ago
parent
commit
a264c50a2f

+ 2 - 0
src/unfccc_ghg_data/helper/__init__.py

@@ -34,6 +34,7 @@ from .functions import (
     get_country_name,
     make_wide_table,
     process_data_for_country,
+    set_to_nan_in_ds,
 )
 
 __all__ = [
@@ -63,4 +64,5 @@ __all__ = [
     "nAI_countries",
     "AI_countries",
     "all_countries",
+    "set_to_nan_in_ds",
 ]

+ 30 - 0
src/unfccc_ghg_data/helper/functions.py

@@ -8,6 +8,7 @@ import copy
 import json
 import re
 import warnings
+from collections.abc import Hashable
 from copy import deepcopy
 from datetime import date
 from pathlib import Path
@@ -1062,6 +1063,35 @@ def find_and_replace_values(
     return df
 
 
+def set_to_nan_in_ds(
+    ds_in: xr.Dataset,
+    entities: list[Hashable],
+    filter: dict[str, any],
+) -> xr.Dataset:
+    """
+    Set values to NaN in a data set.
+
+    Parameters
+    ----------
+    ds_in:
+        input dataset
+    entities
+        list of entities to work on
+    filter
+        .pr.loc type selector which selects the elements that should be replaced
+        with nan
+
+    Returns
+    -------
+        xr.Dataset with the desired values set to nan
+    """
+    ds_mask = xr.zeros_like(ds_in[entities].pr.loc[filter]).combine_first(
+        xr.ones_like(ds_in)
+    )
+
+    return ds_in.where(ds_mask)
+
+
 def assert_values(
     df: pd.DataFrame,
     test_case: tuple[str | float | int],

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/China/__init__.py

@@ -1,6 +1,6 @@
 """Read China's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read China's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 4 - 4
src/unfccc_ghg_data/unfccc_reader/China/config_chn_bur3_nc4.py

@@ -184,7 +184,7 @@ category_conversion = {
             "3.A": "3.A.1",
             "3.B": "3.A.2",
             "3.C": "3.C.7",
-            "3.D": "M.3.AS",
+            "3.D": "M.3.C.45.AG",
             "3.E": "3.C.1.c",
             "3.F": "3.C.1.b",
             "4": "M.LULUCF",
@@ -234,7 +234,7 @@ category_conversion = {
                 },
             },
             "M.3.C.AG": {
-                "sources": ["M.3.C.1.AG", "M.3.AS", "3.C.7"],
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
                 "filter": {
                     "entity": ["CH4", "N2O"],
                 },
@@ -315,7 +315,7 @@ category_conversion = {
             "3.A": "3.A.1",
             "3.B": "3.A.2",
             "3.C": "3.C.7",
-            "3.D": "M.3.AS",
+            "3.D": "M.3.C.45.AG",
             "3.E": "3.C.1.c",
             "4": "M.LULUCF",
             "4.A.1": "3.B.1.a",
@@ -363,7 +363,7 @@ category_conversion = {
                 },
             },
             "M.3.C.AG": {
-                "sources": ["M.3.C.1.AG", "M.3.AS", "3.C.7"],
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
                 "filter": {
                     "entity": ["CH4"],
                 },

+ 11 - 6
src/unfccc_ghg_data/unfccc_reader/China/read_CHN_BUR3_from_pdf.py

@@ -13,7 +13,6 @@ from copy import deepcopy
 
 import camelot
 import primap2 as pm2
-import xarray as xr
 
 from unfccc_ghg_data.helper import (
     compression,
@@ -22,6 +21,7 @@ from unfccc_ghg_data.helper import (
     fix_rows,
     gas_baskets,
     process_data_for_country,
+    set_to_nan_in_ds,
 )
 from unfccc_ghg_data.unfccc_reader.China.config_chn_bur3_nc4 import (
     category_conversion,
@@ -182,11 +182,16 @@ if __name__ == "__main__":
                         entity for entity in entities if entity in entities_current
                     ]
 
-                ds_mask = xr.zeros_like(
-                    data_country[entities].pr.loc[filter]
-                ).combine_first(xr.ones_like(data_country))
-
-                data_country = data_country.where(ds_mask)
+                data_country = set_to_nan_in_ds(
+                    data_country,
+                    entities=entities,
+                    filter=filter,
+                )
+                # ds_mask = xr.zeros_like(
+                #     data_country[entities].pr.loc[filter]
+                # ).combine_first(xr.ones_like(data_country))
+                #
+                # data_country = data_country.where(ds_mask)
 
         data_proc_pm2_new = process_data_for_country(
             data_country,

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/__init__.py

@@ -0,0 +1,30 @@
+"""Read United Arab Emirates' BURs, NIRs, NCs
+
+Scripts and configurations to read United Arab Emirates' submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'ARE'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=ARE
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 201 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/config_are_bur1.py

@@ -0,0 +1,201 @@
+"""Config for United Arab Emirates BUR1
+
+Full configuration including PRIMAP2 conversion config and metadata
+
+"""
+
+#### configuration for PM2 format
+gwp_to_use = "AR4GWP100"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC1996_2006_ARE_Inv",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "ARE-GHG-Inventory",
+    "provenance": "measured",
+    "area": "ARE",
+    "scenario": "BUR1",
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": {
+        "Total": "0",
+        "Energy": "1",
+        "Fuel Combustion Activities": "1.A",
+        "Fugitive Emissions": "1.B",
+        "Venting": "1.B.2.c.1",
+        "Flaring": "1.B.2.c.2",
+        "Other Fugitives": "M.1.B.2.OF",
+        "IPPU": "2",
+        "Mineral Industry": "2.A",
+        "Cement": "2.A.1",
+        "Chemical Industry": "2.B",
+        "Ammonia": "2.B.1",
+        "Metal Industry": "2.C",
+        "Iron & Steel": "2.C.1",
+        "Aluminum": "2.C.3",
+        "Agriculture": "3",
+        "Enteric Fermentation": "3.A",
+        "Manure Management": "3.B",
+        "Managed Soils": "3.D",
+        "LUCF": "4",
+        "Waste": "5",  # waste is more or less in 2006 categories
+        "Solid Waste Disposal": "5.A",
+        "Landfill": "M.5.A.LF",
+        "Biological treatment": "5.B",
+        "Composting": "M.5.B.COMP",
+        "Incineration": "5.C.1",
+        "Wastewater": "5.D",
+        "Memo Items": "IGNORE",
+        "Aviation": "M.1.A",
+        "Marine bunker": "M.1.B",
+    },
+    "entity": {
+        "CO2": "CO2",
+        "CH4": "CH4",
+        "N2O": "N2O",
+        "CH4.1": f"CH4 ({gwp_to_use})",
+        "N2O.1": f"N2O ({gwp_to_use})",
+        "HFCs": f"HFCS ({gwp_to_use})",
+        "PFCs": f"PFCS ({gwp_to_use})",
+        "Total GHG": f"KYOTOGHG ({gwp_to_use})",
+    },
+}
+
+filter_remove = {
+    "rem_cat": {"category": ["Memo Items"]},
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/635318",
+    "rights": "",
+    "contact": "mail@johannes-guestchow.de",
+    "title": "United Arab Emirates. National Communication (NC). NC 5. Biennial Update Report (BUR). BUR 1.",
+    "comment": "Read fom pdf by Johannes Gütschow",
+    "institution": "UNFCCC",
+}
+
+## processing iconfig
+terminology_proc = "IPCC2006_PRIMAP"
+
+category_conversion = {
+    "mapping": {
+        "0": "0",
+        "1": "1",
+        "1.A": "1.A",
+        "1.B": "1.B",
+        "2": "2",
+        "2.A": "2.A",
+        "2.A.1": "2.A.1",
+        "2.B": "2.B",
+        "2.B.1": "2.B.1",
+        "2.C": "2.C",
+        "2.C.1": "2.C.1",
+        "2.C.3": "2.C.3",
+        "3": "M.AG",
+        "3.A": "3.A.1",
+        "3.B": "3.A.2",
+        "3.D": "M.3.C.45.AG",
+        "4": "M.LULUCF",
+        "5": "4",
+        "5.A": "4.A",
+        "5.B": "4.B",
+        "5.C.1": "4.C.1",
+        "5.D": "4.D",
+        "M.1.A": "M.BK.A",
+        "M.1.B": "M.BK.M",
+        "1.B.2.c.1": "M.1.B.2.VEN",
+        "1.B.2.c.2": "M.1.B.2.FL",
+        "M.1.B.2.OF": "M.1.B.2.OF",
+        # "M.5.A.LF": "",
+        # "M.5.B.COMP": "",
+    },
+    "aggregate": {
+        "1.B.2": {
+            "sources": ["M.1.B.2.VEN", "M.1.B.2.FL", "M.1.B.2.OF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "2": {
+            "sources": ["2.G"],
+            "filter": {
+                "entity": ["HFCS"],
+            },
+        },
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            "filter": {
+                "entity": ["CH4", "N2O"],
+            },
+        },
+        "3.C": {
+            "sources": ["M.3.C.45.AG"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG.ELV": {
+            "sources": ["3.C"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG": {  # consitency check
+            "sources": ["3.A", "M.AG.ELV"],
+            "filter": {
+                "entity": ["N2O", "CH4"],
+            },
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "4.C": {
+            "sources": ["4.C.1"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.BK": {
+            "sources": ["M.BK.A", "M.BK.M"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+        "0": {  # consistency check
+            "sources": ["1", "2", "3", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+    },
+}
+
+processing_info_country = {
+    "basket_copy": {
+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        "entities": ["PFCS", "HFCS"],
+        "source_GWP": gwp_to_use,
+    },
+}

+ 155 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/read_ARE_BUR1_from_csv.py

@@ -0,0 +1,155 @@
+"""
+Read data from United Arab Emirates' BUR!.
+
+Data are read a csv file which contains data manually copied from the pdf,
+which was necessary as multiple tables are not machine readable.
+The file contains an inventory for 2021.
+
+"""
+
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    compression,
+    downloaded_data_path,
+    extracted_data_path,
+    gas_baskets,
+    process_data_for_country,
+    set_to_nan_in_ds,
+)
+from unfccc_ghg_data.unfccc_reader.United_Arab_Emirates.config_are_bur1 import (
+    category_conversion,
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    filter_remove,
+    gwp_to_use,
+    meta_data,
+    processing_info_country,
+    terminology_proc,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+    input_folder = downloaded_data_path / "UNFCCC" / "United_Arab_Emirates" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "United_Arab_Emirates"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "ARE_BUR1_"
+    inventory_file = "all_data_manual.csv"
+
+    year = 2021
+    time_format = "%Y"
+
+    # ###
+    # read the tables from csv
+    # ###
+    data_pd = pd.read_csv(input_folder / inventory_file)
+
+    data_pd = pm2.pm2io.nir_add_unit_information(
+        data_pd,
+        unit_row=0,
+        entity_row="header",
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="",
+    )
+    data_pd = data_pd.set_index(data_pd.columns[0])
+    table_long = pm2.pm2io.nir_convert_df_to_long(
+        data_pd,
+        year=year,
+        header_long=["category", "entity", "unit", "time", "data"],
+    )
+
+    # drop CH4, N2O with GWP
+    idx_gwp = table_long[table_long["entity"].isin(["CH4.1", "N2O.1"])].index
+    table_long = table_long.drop(index=idx_gwp)
+
+    data_if = pm2.pm2io.convert_long_dataframe_if(
+        table_long,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        time_format=time_format,
+    )
+
+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_if = data_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"]),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+        encoding=encoding,
+    )
+
+    ### processing
+    data_proc_pm2 = data_pm2.copy()
+
+    # move HFCs from energy to IPPU as their use in electrical
+    # equipment is reported there
+    da_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.loc[{"category": "1"}]
+    ds_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.set(
+        "category", "2.G", da_HFCs, existing="overwrite"
+    )
+    data_proc_pm2 = data_proc_pm2.pr.merge(ds_HFCs)
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"HFCS ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"KYOTOGHG ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+
+    # actual processing
+    data_proc_pm2 = process_data_for_country(
+        data_proc_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        processing_info_country=processing_info_country,
+        cat_terminology_out=terminology_proc,
+        category_conversion=category_conversion,
+    )
+
+    # adapt source and metadata
+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_proc_if,
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )