Browse Source

First working version of Australia CRT specfications

Johannes Gütschow 1 year ago
parent
commit
8e495cf306

+ 14 - 5
UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py

@@ -5,6 +5,7 @@ well as for test-reading to check for new categories etc.
 """
 
 import re
+import os
 import json
 import numpy as np
 import pandas as pd
@@ -246,6 +247,8 @@ def read_crf_table(
     if len(country_codes) == 1:
         try:
             crf_spec = getattr(crf, f"CRF{submission_year}_{country_codes[0]}")
+            print(f"Using country specific specification: " 
+                  f"CRF{submission_year}_{country_codes[0]}")
         except:
             # no country specific specification, check for general specification
             try:
@@ -355,7 +358,10 @@ def read_crf_table_from_file(
         last_row_nan = True
     else:
         last_row_nan = False
-
+    
+    # remove empty columns (for Australia tables start with an empty column)
+    df_raw = df_raw.dropna(how='all', axis=1)
+    
     #### prepare the header (2 row header, first entity, then unit)
     # We do this before removing columns and any other processing to
     # have consistent column names in the configuration and to avoid
@@ -411,7 +417,6 @@ def read_crf_table_from_file(
 
     df_current.iloc[0] = units
     df_current.columns = entities
-
     # remove all columns to ignore
     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
 
@@ -519,7 +524,6 @@ def read_crf_table_from_file(
     # set index
     df_current = df_current.set_index(index_cols)
     # process the unit information using the primap2 functions
-
     df_current = pm2.pm2io.nir_add_unit_information(df_current, **table_properties["unit_info"])
 
     # convert to long format
@@ -665,10 +669,11 @@ def get_info_from_crf_filename(
     dict with fields:
         party: the party that submitted the data (3 letter UNFCCC_GHG_data)
         submission_year: year of submission
-        data_year: year in which the meissions took place
+        data_year: year in which the emissions took place
         date: date of the submission
         extra: rest of the file name
     """
+    filename = os.path.splitext(filename)[0]
     name_parts = filename.split("_")
     file_info = {}
     file_info["party"] = name_parts[0]
@@ -680,7 +685,11 @@ def get_info_from_crf_filename(
               "could not be converted to int.")
         file_info["data_year"] = name_parts[2]
     file_info["date"] = name_parts[3]
-    file_info["extra"] = name_parts[4]
+    # the last part (time code) is missing for Australia since 2023
+    if len(name_parts) > 4:
+        file_info["extra"] = name_parts[4]
+    else:
+        file_info["extra"] = ""
     return file_info
 
 

+ 60 - 11
UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py

@@ -23,6 +23,7 @@ def read_year_to_test_specs(
         submission_year: int,
         data_year: Optional[int]=None,
         totest: Optional[bool]=False,
+        country_code: Optional=None,
 ) -> xr.Dataset:
     """
     Read one xlsx file (so one data year) for each country for a submission year to
@@ -41,11 +42,33 @@ def read_year_to_test_specs(
     if totest:
         print("Reading only tables to test.")
     print("#"*80)
-    try:
-        crf_spec = getattr(crf, f"CRF{submission_year}")
-    except:
-        raise ValueError(f"No terminology exists for submission years {submission_year}, "
-                         f"{submission_year - 1}")
+
+
+    # get specification
+    # if we only have a single country check if we might have a country specific
+    # specification (currently only Australia, 2023)
+    if country_code is not None:
+        try:
+            crf_spec = getattr(crf, f"CRF{submission_year}_{country_code}")
+            print(
+                f"Using country specific specification: "
+                f"CRF{submission_year}_{country_code}"
+            )
+        except:
+            # no country specific specification, check for general specification
+            try:
+                crf_spec = getattr(crf, f"CRF{submission_year}")
+            except:
+                raise ValueError(
+                    f"No terminology exists for submission year " f"{submission_year}"
+                )
+    else:
+        try:
+            crf_spec = getattr(crf, f"CRF{submission_year}")
+        except:
+            raise ValueError(
+                f"No terminology exists for submission year " f"{submission_year}"
+            )
 
     if totest:
         tables = [table for table in crf_spec.keys()
@@ -57,7 +80,11 @@ def read_year_to_test_specs(
           f"CRF{submission_year} specification: {tables}")
     print("#" * 80)
 
-    for country_code in all_crf_countries:
+    if country_code is not None:
+        countries_to_read = [country_code]
+    else:
+        countries_to_read = all_crf_countries
+    for country_code in countries_to_read:
         # get country name
         country_name = get_country_name(country_code)
         print(f"Reading for {country_name}")
@@ -116,14 +143,32 @@ def read_year_to_test_specs(
     # process log messages.
     today = date.today()
     if len(unknown_categories) > 0:
-        log_location = log_path / f"CRF{submission_year}" \
-                       / f"{data_year}_unknown_categories_{today.strftime('%Y-%m-%d')}.csv"
+        if country_code is not None:
+            log_location = (
+                log_path
+                / f"CRF{submission_year}"
+                / f"{data_year}_unknown_categories_{country_code}"
+                  f"_{today.strftime('%Y-%m-%d')}.csv"
+            )
+        else:
+            log_location = (log_path / f"CRF{submission_year}"
+                            / f"{data_year}_unknown_categories_"
+                              f"{today.strftime('%Y-%m-%d')}.csv")
         print(f"Unknown rows found. Savin log to {log_location}")
         save_unknown_categories_info(unknown_categories, log_location)
 
     if len(last_row_info) > 0:
-        log_location = log_path / f"CRF{submission_year}" \
-                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
+        if country_code is not None:
+            log_location = (
+               log_path
+               / f"CRF{submission_year}"
+               / f"{data_year}_last_row_info_{country_code}_"
+                 f"{today.strftime('%Y-%m-%d')}.csv"
+           )
+        else:
+            log_location = (log_path / f"CRF{submission_year}"
+                            / f"{data_year}_last_row_info_"
+                              f"{today.strftime('%Y-%m-%d')}.csv")
         print(f"Data found in the last row. Saving log to "
               f"{log_location}")
         save_last_row_info(last_row_info, log_location)
@@ -131,7 +176,11 @@ def read_year_to_test_specs(
     # save the data:
     compression = dict(zlib=True, complevel=9)
     output_folder = log_path / f"test_read_CRF{submission_year}"
-    output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
+    if country_code is not None:
+        output_filename = (f"CRF{submission_year}_{country_code}_"
+                           f"{today.strftime('%Y-%m-%d')}")
+    else:
+        output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
     if totest:
         output_filename = output_filename + "_totest"
 

+ 15 - 4
UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py

@@ -90,12 +90,23 @@ def read_crf_for_country(
     # get country name
     country_name = get_country_name(country_code)
 
-    # get specification and available tables
+
+    # get specification
+    # if we only have a single country check if we might have a country specific
+    # specification (currently only Australia, 2023)
     try:
-        crf_spec = getattr(crf, f"CRF{submission_year}")
-        #print(table_spec)
+        crf_spec = getattr(crf, f"CRF{submission_year}_{country_code}")
+        print(f"Using country specific specification: "
+              f"CRF{submission_year}_{country_code}")
     except:
-        raise ValueError(f"No terminology exists for submission year {submission_year}")
+        # no country specific specification, check for general specification
+        try:
+            crf_spec = getattr(crf, f"CRF{submission_year}")
+        except:
+            raise ValueError(
+                f"No terminology exists for submission year " f"{submission_year}"
+            )
+
 
     tables = [table for table in crf_spec.keys()
               if crf_spec[table]["status"] == "tested"]

File diff suppressed because it is too large
+ 251 - 232
UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_AUS_specification.py


+ 2 - 1
UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py

@@ -5,5 +5,6 @@ Define the CRF specifications here for easy access
 from .CRF2021_specification import CRF2021
 from .CRF2022_specification import CRF2022
 from .CRF2023_specification import CRF2023
+from .CRF2023_AUS_specification import CRF2023_AUS
 
-__all__ = ["CRF2021", "CRF2022", "CRF2023"]
+__all__ = ["CRF2021", "CRF2022", "CRF2023", "CRF2023_AUS"]

+ 6 - 2
UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py

@@ -16,6 +16,7 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "CO2 equivalent (kt)": "kt CO2eq",
+            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table2(II)
         },
         "default_unit": "t",
     },
@@ -26,8 +27,11 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "CO2 equivalent (kt)": "kt CO2eq",
-            "CO2 equivalents (kt)": "kt CO2eq", # for AUS Table1
+            "CO2 equivalents (kt) (2) ": "kt CO2eq", # for AUS Table1
             "CO2 equivalent (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
+            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
+            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table3
+            "CO2 equivalents (kt) (4)": "kt CO2eq", # for AUS Table4
         },
         "default_unit": "kt",
     },
@@ -38,7 +42,7 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "(kt CO2 equivalent)": "kt CO2eq",
-            "CO2 equivalent (kt) (3)": "kt CO2eq", # for AUS
+            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS
         },
         "default_unit": "kt",
     },

+ 4 - 0
UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py

@@ -9,12 +9,15 @@ import argparse
 parser = argparse.ArgumentParser()
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
+parser.add_argument('--country', help='Country to read', type=str, default=None)
 parser.add_argument('--totest', help='read tables to test', action='store_true')
 args = parser.parse_args()
 
 
 submission_year = args.submission_year
 data_year = args.data_year
+country = args.country
+#print(f"totest: {args.totest}")
 if args.totest:
     totest = True
 else:
@@ -24,6 +27,7 @@ read_year_to_test_specs(
     submission_year=submission_year,
     data_year=data_year,
     totest=totest,
+    country_code=country,
 )
 
 

+ 2 - 1
dodo.py

@@ -202,7 +202,7 @@ read_config_crf = {
     "re_read": get_var('re_read', False),
     "countries": get_var('countries', None),
     "data_year": get_var('data_year', None),
-    "totest": get_var('data_year', None),
+    "totest": get_var('totest', None),
 }
 
 def task_read_unfccc_crf_submission():
@@ -255,6 +255,7 @@ def task_test_read_unfccc_crf_for_year():
                f"UNFCCC_GHG_data/UNFCCC_CRF_reader"
                f"/test_read_UNFCCC_CRF_for_year.py "
                f"--submission_year={read_config_crf['submission_year']} "
+               f"--country={read_config_crf['country']} "
                ]
     if read_config_crf["totest"] == "True":
         actions[0] = actions[0] + " --totest"

Some files were not shown because too many files changed in this diff