1 year ago · f0e96c7335
--- a/UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_config.py
+++ b/UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_config.py
@@ -4,6 +4,26 @@ di_query_filters = [
 
															 # category, party are extra
														
 
															 # measure is preprocessed to find ids
														
 
															+# the activity data and emissions factors have a structure that is incompatible
														
 
															+# with PRIMAP2.
														
 
															+# To read it into a primap2 dataframe the information in classification / measure
														
 
															+# has to be put into "entity" which is currently always "No gas". I's possible,
														
 
															+# but takes some time, so I have omitted it here
														
 
															+filter_activity_factors = {
														
 
															+    "entity": {"gas": ["No gas"]},
														
 
															+    "unit": {"unit": [
														
 
															+        'no unit', 'kg/TJ', 't/TJ', '%', 'kg/t',
														
 
															+        'kg/kt', 't/t', 'kg/head/year', 'kg N2O/kg N handled', 'kg N2O/kg N',
														
 
															+        'kg N2O-N/kg N handled', 'g/m^2', 'kg N2O-N/kg N', 'kg N2O-N/ha', 'kg/t dm',
														
 
															+        't CO2-C/t', 't/unit', 't C/ha', 'kg CH4/ha', 'kg CO2/ha',
														
 
															+        'g/kg', 'kg/kg DC',
														
 
															+    ]
														
 
															+    },
														
 
															+}
														
 
															+
														
 
															+# regular expression to match category code in category label
														
 
															+cat_code_regexp = r'(?P<code>^(([0-9][A-Za-z0-9\.]{0,10}[0-9A-Za-z]))|([0-9]))[' \
														
 
															+                  r'\s\.].*'
														
 
															 # PRIMAP2 interchange format config
														
 
															 di_to_pm2if_template_nai = {
														
--- a/UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_core.py
+++ b/UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_core.py
@@ -24,6 +24,7 @@ from .UNFCCC_DI_reader_config import di_query_filters
 
															 from .UNFCCC_DI_reader_config import di_processing_info
														
 
															 from .UNFCCC_DI_reader_config import cat_conversion
														
 
															 from .UNFCCC_DI_reader_config import gas_baskets
														
 
															+from .UNFCCC_DI_reader_config import cat_code_regexp
														
 
															 from .util import NoDIDataError, nAI_countries, AI_countries
														
 
															 from .util import DI_date_format, regex_date
														
@@ -413,14 +414,14 @@ def read_UNFCCC_DI_for_country_df(
 
															     }
														
 
															     # find country group
														
 
															-    if country_code in list(reader.non_annex_one_reader.parties["code"]):
														
 
															+    if country_code in nAI_countries:
														
 
															         ai_country = False
														
 
															-    elif country_code in list(reader.annex_one_reader.parties["code"]):
														
 
															+    elif country_code in AI_countries:
														
 
															         ai_country = True
														
 
															         #di_data = reader.annex_one_reader.query(**query)
														
 
															     else:
														
 
															         raise ValueError(f"Country code {country_code} found neither in AnnexI nor "
														
 
															-                         f"non-AnnexI countrz lists.")
														
 
															+                         f"non-AnnexI country lists.")
														
 
															     if category_groups is None:
														
 
															         # no category defs given, so use default which is all categories,
														
@@ -558,27 +559,6 @@ def convert_DI_data_to_pm2_if(
 
															     print("Convert data to PRIMAP2 interchange format")
														
 
															-    # regular expression to match category code in category label
														
 
															-    cat_code_regexp = r'(?P<code>^(([0-9][A-Za-z0-9\.]{0,10}[0-9A-Za-z]))|([0-9]))[' \
														
 
															-                      r'\s\.].*'
														
 
															-
														
 
															-    # the activity data and emissions factors have a structure that is incompatible
														
 
															-    # with PRIMAP2.
														
 
															-    # To read it into a primap2 dataframe the information in classification / measure
														
 
															-    # has to be put into "entity" which is currently always "No gas". I's possible,
														
 
															-    # but takes some time, so I have omitted it here
														
 
															-    filter_activity_factors = {
														
 
															-        "entity": {"gas": ["No gas"]},
														
 
															-        "unit": {"unit": [
														
 
															-            'no unit', 'kg/TJ', 't/TJ', '%', 'kg/t',
														
 
															-            'kg/kt', 't/t', 'kg/head/year', 'kg N2O/kg N handled', 'kg N2O/kg N',
														
 
															-            'kg N2O-N/kg N handled', 'g/m^2', 'kg N2O-N/kg N', 'kg N2O-N/ha', 'kg/t dm',
														
 
															-            't CO2-C/t', 't/unit', 't C/ha', 'kg CH4/ha', 'kg CO2/ha',
														
 
															-            'g/kg', 'kg/kg DC',
														
 
															-        ]
														
 
															-        },
														
 
															-    }
														
 
															-
														
 
															     # create a copy of the data to avoid data altering the original data
														
 
															     # this will be done inside the *convert_to_long_dataframe* function
														
 
															     # in the future. Thus it can be removed here once the category column
														
@@ -587,12 +567,10 @@ def convert_DI_data_to_pm2_if(
 
															     # check which country group we have
														
 
															     reader = unfccc_di_api.UNFCCCApiReader()
														
 
															-    ai_parties = list(reader.annex_one_reader.parties["code"])
														
 
															-    nai_parties = list(reader.non_annex_one_reader.parties["code"])
														
 
															     parties_present_ai = [party for party in data_temp["party"].unique() if party
														
 
															-                          in ai_parties]
														
 
															+                          in AI_countries]
														
 
															     parties_present_nai = [party for party in data_temp["party"].unique() if party
														
 
															-                          in nai_parties]
														
 
															+                          in nAI_countries]
														
 
															     if len(parties_present_ai) > 0:
														
 
															         if len(parties_present_nai) > 0:
														
 
															             raise ValueError("AnnexI and non-AnnexI parties present in one dataset. "
														
@@ -857,7 +835,6 @@ def read_UNFCCC_DI_for_country_group(
 
															 ) -> xr.Dataset:
														
 
															     '''
														
 
															     This function reads DI data for all countries in a group (annexI or non-AnnexI)
														
 
															-    TODO: currently only non-annexI is implemented
														
 
															     The function reads all data in one go using datalad run. as the output data file
														
 
															     names are unknown beforehand datalad run uses explicit=false
														
 
															     '''
														
@@ -865,6 +842,66 @@ def read_UNFCCC_DI_for_country_group(
 
															     today = date.today()
														
 
															     date_str = today.strftime(DI_date_format)
														
 
															+    if annexI:
														
 
															+        countries = AI_countries
														
 
															+    else:
														
 
															+        countries = nAI_countries
														
 
															+
														
 
															+    # read the data
														
 
															+    data_all = None
														
 
															+    for country in countries[0:5]:
														
 
															+        print(f"reading DI data for country {country}")
														
 
															+
														
 
															+        try:
														
 
															+            data_country = read_UNFCCC_DI_for_country(
														
 
															+                country_code=country,
														
 
															+                category_groups=None,  # read all categories
														
 
															+                read_subsectors=False,  # not applicable as we read all categories
														
 
															+                date_str=date_str,
														
 
															+                pm2if_specifications=None,
														
 
															+                # automatically use the right specs for AI and NAI
														
 
															+                default_gwp=None,  # automatically uses right default GWP for AI and NAI
														
 
															+                debug=False)
														
 
															+
														
 
															+            if data_all is None:
														
 
															+                data_all = data_country
														
 
															+            else:
														
 
															+                data_all = data_all.pr.merge(data_country)
														
 
															+        except unfccc_di_api.NoDataError as err:
														
 
															+            print(f"No data for {country}.")
														
 
															+            print(err)
														
 
															+
														
 
															+    # TODO: add more info to metadata? (like list of covered countries)
														
 
															+    if annexI:
														
 
															+        data_all.attrs["comment"] = data_all.attrs["comment"] + " Data for AnnexI " \
														
 
															+                                                                "countries."
														
 
															+    else:
														
 
															+        data_all.attrs["comment"] = data_all.attrs["comment"] + " Data for non-AnnexI " \
														
 
															+                                                                "countries."
														
 
															+
														
 
															+    # save the data
														
 
															+    save_DI_dataset(data_all, raw=True, annexI=annexI)
														
 
															+
														
 
															+    return data_all
														
 
															+
														
 
															+
														
 
															+def process_UNFCCC_DI_for_country_group(
														
 
															+        annexI: bool=False,
														
 
															+) -> xr.Dataset:
														
 
															+    '''
														
 
															+    This function processes DI data for all countries in a group (annexI or non-AnnexI)
														
 
															+    TODO: currently only non-annexI is implemented
														
 
															+    The function processes all data in one go using datalad run. as the output data file
														
 
															+    names are unknown beforehand datalad run uses explicit=false
														
 
															+
														
 
															+    TODO: use the latest
														
 
															+
														
 
															+
														
 
															+    '''
														
 
															+
														
 
															+    today = date.today()
														
 
															+    date_str = today.strftime(DI_date_format)
														
 
															+
														
 
															     if annexI:
														
 
															         raise ValueError("Bulk reading for AnnexI countries not implemented yet")
														
 
															     else:
														
--- a/extracted_data/UNFCCC/folder_mapping.json
+++ b/extracted_data/UNFCCC/folder_mapping.json
@@ -17,6 +17,7 @@
 
															     "GRC": "Greece",
														
 
															     "LTU": "Lithuania",
														
 
															     "GBR": "United_Kingdom",
														
 
															+    "AFG": "Afghanistan",
														
 
															     "USA": "United_States",
														
 
															     "TUR": "Turkey",
														
 
															     "NOR": "Norway",
														
@@ -24,14 +25,17 @@
 
															     "UKR": "Ukraine",
														
 
															     "ARG": "Argentina",
														
 
															     "FIN": "Finland",
														
 
															+    "ALB": "Albania",
														
 
															     "BEL": "Belgium",
														
 
															     "MCO": "Monaco",
														
 
															+    "AGO": "Angola",
														
 
															     "KAZ": "Kazakhstan",
														
 
															     "JPN": "Japan",
														
 
															     "CYP": "Cyprus",
														
 
															     "ESP": "Spain",
														
 
															     "NLD": "Netherlands",
														
 
															     "LVA": "Latvia",
														
 
															+    "DZA": "Algeria",
														
 
															     "MAR": "Morocco",
														
 
															     "CAN": "Canada",
														
 
															     "DEU": "Germany",