Browse Source

all countries included in DI processing configuration

Johannes Gütschow 1 year ago
parent
commit
d122069339

+ 101 - 0
UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_config.py

@@ -1407,6 +1407,99 @@ di_processing_templates = {
     # TTO: 1990 only
     # TTO: 1990 only
     # TUN: 1994, 2000
     # TUN: 1994, 2000
     # TUV: 1994, 2014, many sectors missiong / 0 (but maybe as there are no emissions)
     # TUV: 1994, 2014, many sectors missiong / 0 (but maybe as there are no emissions)
+    # TZA: 1990, 1994
+    # UGA: 1994, 2000, subcategories a bit inconsistent
+    'URY': {
+        'DI2023-05-24': {
+            'downscale': {
+                'sectors': {
+                    '1': {
+                        'basket': '1',
+                        'basket_contents': ['1.A', '1.B'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '1.A': {
+                        'basket': '1.A',
+                        'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4',
+                                            '1.A.5'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '1.B': {
+                        'basket': '1.B',
+                        'basket_contents': ['1.B.1', '1.B.2'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '2_CO2CH4N2O': {
+                        'basket': '2',
+                        'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.G'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '2_FGASES': {
+                        'basket': '2',
+                        'basket_contents': ['2.C', '2.E', '2.F'],
+                        'entities': ['C2F6', 'CF4', 'HFC125', 'HFC134a', 'HFC143a',
+                                     'HFC152a', 'HFC227ea', 'HFC23', 'HFC32', 'SF6'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '4': {
+                        'basket': '4',
+                        'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E', '4.F',
+                                            '4.G'],
+                        'entities': ['CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '5': {
+                        'basket': '5',
+                        'basket_contents': ['5.A', '5.B', '5.C', '5.D', '5.E'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '6': {
+                        'basket': '6',
+                        'basket_contents': ['6.A', '6.B', '6.C', '6.D'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                },
+            },
+        },
+    },
+    # UZB: 1990-2012
+    # VCT: 1990, 1994, 1997, 2000, 2004. Sector coverage a bit inconsistent. 1.A.x
+    # missing for CH4 but present for CO2. IPPU is 0, subsectors missing downscaling
+    # doesn't wor for all 0 / Nan timeseries
+    # VEN: 1999 only
+    # VNM: more data in BUR3
+    # VUT: more data in NC3
+    # WSM: more data in NC2
+    # YEM: 1995, 2000, 2010, 2012. subsectoral data a bit inconsistent, e.g. for 1.A.x
+    # ZAF: 1990, 1994
+    'ZMB': {
+        'DI2023-05-24': {  # 1994, 2000
+            'downscale': { # for 2000
+                'sectors': {
+                    '5': {
+                        'basket': '5',
+                        'basket_contents': ['5.B', '5.C'],
+                        'entities': ['CO2', 'CH4', 'N2O'],
+                        'dim': 'category (BURDI)',
+                    },
+                    '6': {
+                        'basket': '6',
+                        'basket_contents': ['6.A', '6.B'],
+                        'entities': ['CH4'],
+                        'dim': 'category (BURDI)',
+                    },
+                },
+            },
+        },
+    },
+    # ZWE: 1994, 2000, 2006 consistency of sectors and coverage does not look good,
+    # esopecially for subsectors
 }
 }
 
 
 di_processing_info = {
 di_processing_info = {
@@ -1527,6 +1620,14 @@ di_processing_info = {
         'default': di_processing_templates['THA']['DI2023-05-24'],
         'default': di_processing_templates['THA']['DI2023-05-24'],
         'DI2023-05-24': di_processing_templates['THA']['DI2023-05-24'],
         'DI2023-05-24': di_processing_templates['THA']['DI2023-05-24'],
     },
     },
+    'URY': {
+        'default': di_processing_templates['URY']['DI2023-05-24'],
+        'DI2023-05-24': di_processing_templates['URY']['DI2023-05-24'],
+    },
+    'ZMB': {
+        'default': di_processing_templates['ZMB']['DI2023-05-24'],
+        'DI2023-05-24': di_processing_templates['ZMB']['DI2023-05-24'],
+    },
 }
 }
 
 
 gas_baskets = {
 gas_baskets = {

+ 6 - 4
UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_proc.py

@@ -63,7 +63,7 @@ def process_and_save_UNFCCC_DI_for_country(
         data_country=data_to_process,
         data_country=data_to_process,
         entities_to_ignore=entities_to_ignore,
         entities_to_ignore=entities_to_ignore,
         gas_baskets=gas_baskets,
         gas_baskets=gas_baskets,
-        cat_conversion=cat_conversion,
+        #category_conversion=cat_conversion,
         sectors_out=None,
         sectors_out=None,
         processing_info_country=processing_info_country,
         processing_info_country=processing_info_country,
     )
     )
@@ -79,7 +79,7 @@ def process_UNFCCC_DI_for_country(
         entities_to_ignore: List[str],
         entities_to_ignore: List[str],
         gas_baskets: Dict[str, List[str]],
         gas_baskets: Dict[str, List[str]],
         filter_dims: Optional[Dict[str, List[str]]] = None,
         filter_dims: Optional[Dict[str, List[str]]] = None,
-        cat_conversion: Dict[str, Dict] = None,
+        category_conversion: Dict[str, Dict] = None,
         sectors_out: List[str] = None,
         sectors_out: List[str] = None,
         processing_info_country: Dict = None,
         processing_info_country: Dict = None,
 ) -> xr.Dataset:
 ) -> xr.Dataset:
@@ -289,9 +289,11 @@ def process_UNFCCC_DI_for_country(
     if country_code in nAI_countries:
     if country_code in nAI_countries:
         # conversion from BURDI to IPCC2006_PRIMAP needed
         # conversion from BURDI to IPCC2006_PRIMAP needed
         cat_terminology_out = 'IPCC2006_PRIMAP'
         cat_terminology_out = 'IPCC2006_PRIMAP'
+        if category_conversion is None:
+            category_conversion = cat_conversion[f"{cat_terminology_in}_to_{cat_terminology_out}"]
         data_country = convert_categories(
         data_country = convert_categories(
             data_country,
             data_country,
-            cat_conversion[f"{cat_terminology_in}_to_{cat_terminology_out}"],
+            category_conversion,
             cat_terminology_out,
             cat_terminology_out,
             debug=False,
             debug=False,
             tolerance=0.01,
             tolerance=0.01,
@@ -345,7 +347,7 @@ def process_UNFCCC_DI_for_country_group(
 ) -> xr.Dataset:
 ) -> xr.Dataset:
     """
     """
     This function processes DI data for all countries in a group (annexI or non-AnnexI)
     This function processes DI data for all countries in a group (annexI or non-AnnexI)
-    TODO: currently only non-annexI is implemented
+    
     The function processes all data in one go using datalad run. as the output data file
     The function processes all data in one go using datalad run. as the output data file
     names are unknown beforehand datalad run uses explicit=false
     names are unknown beforehand datalad run uses explicit=false
 
 

+ 4 - 2
UNFCCC_GHG_data/UNFCCC_DI_reader/util.py

@@ -4,9 +4,11 @@ from UNFCCC_GHG_data.helper import code_path
 
 
 #reader = unfccc_di_api.UNFCCCApiReader()
 #reader = unfccc_di_api.UNFCCCApiReader()
 #nAI_countries = list(reader.non_annex_one_reader.parties["code"])
 #nAI_countries = list(reader.non_annex_one_reader.parties["code"])
-nAI_countries = pd.read_csv(code_path / 'UNFCCC_DI_reader' / 'DI_NAI_parties.conf')
+nAI_countries = list(pd.read_csv(code_path / 'UNFCCC_DI_reader' /
+                                 'DI_NAI_parties.conf')["code"])
 #AI_countries = list(reader.annex_one_reader.parties["code"])
 #AI_countries = list(reader.annex_one_reader.parties["code"])
-AI_countries = pd.read_csv(code_path / 'UNFCCC_DI_reader' / 'DI_AI_parties.conf')
+AI_countries = list(pd.read_csv(code_path / 'UNFCCC_DI_reader' /
+                                'DI_AI_parties.conf')["code"])
 
 
 DI_date_format = '%Y-%m-%d'
 DI_date_format = '%Y-%m-%d'
 regex_date = r"([0-9]{4}-[0-9]{2}-[0-9]{2})"
 regex_date = r"([0-9]{4}-[0-9]{2}-[0-9]{2})"

+ 15 - 0
UNFCCC_GHG_data/helper/functions.py

@@ -1,6 +1,7 @@
 import pycountry
 import pycountry
 import json
 import json
 import xarray as xr
 import xarray as xr
+import pandas as pd
 from copy import deepcopy
 from copy import deepcopy
 from typing import Dict, List
 from typing import Dict, List
 from pathlib import Path
 from pathlib import Path
@@ -20,6 +21,12 @@ def convert_categories(
     """
     """
     convert data from one category terminology to another
     convert data from one category terminology to another
     """
     """
+    print(f"converting categories to {terminology_to}")
+
+    if 'orig_cat_name' in ds_input.coords:
+        cat_name_present = True
+    else:
+        cat_name_present = False
     ds_converted = ds_input.copy(deep=True)
     ds_converted = ds_input.copy(deep=True)
     ds_converted.attrs = deepcopy(ds_input.attrs)
     ds_converted.attrs = deepcopy(ds_input.attrs)
 
 
@@ -55,6 +62,8 @@ def convert_categories(
                 print(f"Category: {cat_to_agg}")
                 print(f"Category: {cat_to_agg}")
             source_cats = [cat for cat in aggregate_cats[cat_to_agg]['sources'] if
             source_cats = [cat for cat in aggregate_cats[cat_to_agg]['sources'] if
                            cat in cats_present_mapped]
                            cat in cats_present_mapped]
+            if debug:
+                print(source_cats)
             data_agg = ds_converted.pr.loc[{'category': source_cats}].pr.sum(
             data_agg = ds_converted.pr.loc[{'category': source_cats}].pr.sum(
                 dim='category', skipna=True, min_count=1)
                 dim='category', skipna=True, min_count=1)
             nan_vars = [var for var in data_agg.data_vars if
             nan_vars = [var for var in data_agg.data_vars if
@@ -65,7 +74,13 @@ def convert_categories(
                 data_agg = data_agg.assign_coords(
                 data_agg = data_agg.assign_coords(
                     coords={f'category ({terminology_to})':
                     coords={f'category ({terminology_to})':
                                 (f'category ({terminology_to})', [cat_to_agg])})
                                 (f'category ({terminology_to})', [cat_to_agg])})
+                if cat_name_present:
+                    data_agg = data_agg.assign_coords(
+                        coords={'orig_cat_name':
+                                    (f'category ({terminology_to})',
+                                     [aggregate_cats[cat_to_agg]['name']])})
                 ds_converted = ds_converted.pr.merge(data_agg, tolerance=tolerance)
                 ds_converted = ds_converted.pr.merge(data_agg, tolerance=tolerance)
+                cats_present_mapped.append(cat_to_agg)
             else:
             else:
                 print(f"no data to aggregate category {cat_to_agg}")
                 print(f"no data to aggregate category {cat_to_agg}")