Browse Source

all countries included in DI processing configuration

Johannes Gütschow 1 year ago
parent
commit
d122069339

+ 101 - 0
UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_config.py

@@ -1407,6 +1407,99 @@ di_processing_templates = {
     # TTO: 1990 only
     # TUN: 1994, 2000
     # TUV: 1994, 2014, many sectors missiong / 0 (but maybe as there are no emissions)
+    # TZA: 1990, 1994
+    # UGA: 1994, 2000, subcategories a bit inconsistent
+    'URY': {
+        'DI2023-05-24': {
+            'downscale': {
+                'sectors': {
+                    '1': {
+                        'basket': '1',
+                        'basket_contents': ['1.A', '1.B'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '1.A': {
+                        'basket': '1.A',
+                        'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4',
+                                            '1.A.5'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '1.B': {
+                        'basket': '1.B',
+                        'basket_contents': ['1.B.1', '1.B.2'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '2_CO2CH4N2O': {
+                        'basket': '2',
+                        'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.G'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '2_FGASES': {
+                        'basket': '2',
+                        'basket_contents': ['2.C', '2.E', '2.F'],
+                        'entities': ['C2F6', 'CF4', 'HFC125', 'HFC134a', 'HFC143a',
+                                     'HFC152a', 'HFC227ea', 'HFC23', 'HFC32', 'SF6'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '4': {
+                        'basket': '4',
+                        'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E', '4.F',
+                                            '4.G'],
+                        'entities': ['CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '5': {
+                        'basket': '5',
+                        'basket_contents': ['5.A', '5.B', '5.C', '5.D', '5.E'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '6': {
+                        'basket': '6',
+                        'basket_contents': ['6.A', '6.B', '6.C', '6.D'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                },
+            },
+        },
+    },
+    # UZB: 1990-2012
+    # VCT: 1990, 1994, 1997, 2000, 2004. Sector coverage a bit inconsistent. 1.A.x
+    # missing for CH4 but present for CO2. IPPU is 0, subsectors missing downscaling
+    # doesn't wor for all 0 / Nan timeseries
+    # VEN: 1999 only
+    # VNM: more data in BUR3
+    # VUT: more data in NC3
+    # WSM: more data in NC2
+    # YEM: 1995, 2000, 2010, 2012. subsectoral data a bit inconsistent, e.g. for 1.A.x
+    # ZAF: 1990, 1994
+    'ZMB': {
+        'DI2023-05-24': {  # 1994, 2000
+            'downscale': { # for 2000
+                'sectors': {
+                    '5': {
+                        'basket': '5',
+                        'basket_contents': ['5.B', '5.C'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '6': {
+                        'basket': '6',
+                        'basket_contents': ['6.A', '6.B'],
+                        'entities': ['CH4'],
+                        'dim': 'category (BURDI)',
+                    },
+                },
+            },
+        },
+    },
+    # ZWE: 1994, 2000, 2006 consistency of sectors and coverage does not look good,
+    # esopecially for subsectors
 }
 
 di_processing_info = {
@@ -1527,6 +1620,14 @@ di_processing_info = {
         'default': di_processing_templates['THA']['DI2023-05-24'],
         'DI2023-05-24': di_processing_templates['THA']['DI2023-05-24'],
     },
+    'URY': {
+        'default': di_processing_templates['URY']['DI2023-05-24'],
+        'DI2023-05-24': di_processing_templates['URY']['DI2023-05-24'],
+    },
+    'ZMB': {
+        'default': di_processing_templates['ZMB']['DI2023-05-24'],
+        'DI2023-05-24': di_processing_templates['ZMB']['DI2023-05-24'],
+    },
 }
 
 gas_baskets = {

+ 6 - 4
UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_proc.py

@@ -63,7 +63,7 @@ def process_and_save_UNFCCC_DI_for_country(
         data_country=data_to_process,
         entities_to_ignore=entities_to_ignore,
         gas_baskets=gas_baskets,
-        cat_conversion=cat_conversion,
+        #category_conversion=cat_conversion,
         sectors_out=None,
         processing_info_country=processing_info_country,
     )
@@ -79,7 +79,7 @@ def process_UNFCCC_DI_for_country(
         entities_to_ignore: List[str],
         gas_baskets: Dict[str, List[str]],
         filter_dims: Optional[Dict[str, List[str]]] = None,
-        cat_conversion: Dict[str, Dict] = None,
+        category_conversion: Dict[str, Dict] = None,
         sectors_out: List[str] = None,
         processing_info_country: Dict = None,
 ) -> xr.Dataset:
@@ -289,9 +289,11 @@ def process_UNFCCC_DI_for_country(
     if country_code in nAI_countries:
         # conversion from BURDI to IPCC2006_PRIMAP needed
         cat_terminology_out = 'IPCC2006_PRIMAP'
+        if category_conversion is None:
+            category_conversion = cat_conversion[f"{cat_terminology_in}_to_{cat_terminology_out}"]
         data_country = convert_categories(
             data_country,
-            cat_conversion[f"{cat_terminology_in}_to_{cat_terminology_out}"],
+            category_conversion,
             cat_terminology_out,
             debug=False,
             tolerance=0.01,
@@ -345,7 +347,7 @@ def process_UNFCCC_DI_for_country_group(
 ) -> xr.Dataset:
     """
     This function processes DI data for all countries in a group (annexI or non-AnnexI)
-    TODO: currently only non-annexI is implemented
+    
     The function processes all data in one go using datalad run. as the output data file
     names are unknown beforehand datalad run uses explicit=false
 

+ 4 - 2
UNFCCC_GHG_data/UNFCCC_DI_reader/util.py

@@ -4,9 +4,11 @@ from UNFCCC_GHG_data.helper import code_path
 
 #reader = unfccc_di_api.UNFCCCApiReader()
 #nAI_countries = list(reader.non_annex_one_reader.parties["code"])
-nAI_countries = pd.read_csv(code_path / 'UNFCCC_DI_reader' / 'DI_NAI_parties.conf')
+nAI_countries = list(pd.read_csv(code_path / 'UNFCCC_DI_reader' /
+                                 'DI_NAI_parties.conf')["code"])
 #AI_countries = list(reader.annex_one_reader.parties["code"])
-AI_countries = pd.read_csv(code_path / 'UNFCCC_DI_reader' / 'DI_AI_parties.conf')
+AI_countries = list(pd.read_csv(code_path / 'UNFCCC_DI_reader' /
+                                'DI_AI_parties.conf')["code"])
 
 DI_date_format = '%Y-%m-%d'
 regex_date = r"([0-9]{4}-[0-9]{2}-[0-9]{2})"

+ 15 - 0
UNFCCC_GHG_data/helper/functions.py

@@ -1,6 +1,7 @@
 import pycountry
 import json
 import xarray as xr
+import pandas as pd
 from copy import deepcopy
 from typing import Dict, List
 from pathlib import Path
@@ -20,6 +21,12 @@ def convert_categories(
     """
     convert data from one category terminology to another
     """
+    print(f"converting categories to {terminology_to}")
+
+    if 'orig_cat_name' in ds_input.coords:
+        cat_name_present = True
+    else:
+        cat_name_present = False
     ds_converted = ds_input.copy(deep=True)
     ds_converted.attrs = deepcopy(ds_input.attrs)
 
@@ -55,6 +62,8 @@ def convert_categories(
                 print(f"Category: {cat_to_agg}")
             source_cats = [cat for cat in aggregate_cats[cat_to_agg]['sources'] if
                            cat in cats_present_mapped]
+            if debug:
+                print(source_cats)
             data_agg = ds_converted.pr.loc[{'category': source_cats}].pr.sum(
                 dim='category', skipna=True, min_count=1)
             nan_vars = [var for var in data_agg.data_vars if
@@ -65,7 +74,13 @@ def convert_categories(
                 data_agg = data_agg.assign_coords(
                     coords={f'category ({terminology_to})':
                                 (f'category ({terminology_to})', [cat_to_agg])})
+                if cat_name_present:
+                    data_agg = data_agg.assign_coords(
+                        coords={'orig_cat_name':
+                                    (f'category ({terminology_to})',
+                                     [aggregate_cats[cat_to_agg]['name']])})
                 ds_converted = ds_converted.pr.merge(data_agg, tolerance=tolerance)
+                cats_present_mapped.append(cat_to_agg)
             else:
                 print(f"no data to aggregate category {cat_to_agg}")