1 year ago · 8e495cf306
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
@@ -5,6 +5,7 @@ well as for test-reading to check for new categories etc.
 
				 """
			
 
				 
			
 
				 import re
			
 
				+import os
			
 
				 import json
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
@@ -246,6 +247,8 @@ def read_crf_table(
 
				     if len(country_codes) == 1:
			
 
				         try:
			
 
				             crf_spec = getattr(crf, f"CRF{submission_year}_{country_codes[0]}")
			
 
				+            print(f"Using country specific specification: " 
			
 
				+                  f"CRF{submission_year}_{country_codes[0]}")
			
 
				         except:
			
 
				             # no country specific specification, check for general specification
			
 
				             try:
			
@@ -355,7 +358,10 @@ def read_crf_table_from_file(
 
				         last_row_nan = True
			
 
				     else:
			
 
				         last_row_nan = False
			
 
				-
			
 
				+    
			
 
				+    # remove empty columns (for Australia tables start with an empty column)
			
 
				+    df_raw = df_raw.dropna(how='all', axis=1)
			
 
				+    
			
 
				     #### prepare the header (2 row header, first entity, then unit)
			
 
				     # We do this before removing columns and any other processing to
			
 
				     # have consistent column names in the configuration and to avoid
			
@@ -411,7 +417,6 @@ def read_crf_table_from_file(
 
				 
			
 
				     df_current.iloc[0] = units
			
 
				     df_current.columns = entities
			
 
				-
			
 
				     # remove all columns to ignore
			
 
				     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
			
 
				 
			
@@ -519,7 +524,6 @@ def read_crf_table_from_file(
 
				     # set index
			
 
				     df_current = df_current.set_index(index_cols)
			
 
				     # process the unit information using the primap2 functions
			
 
				-
			
 
				     df_current = pm2.pm2io.nir_add_unit_information(df_current, **table_properties["unit_info"])
			
 
				 
			
 
				     # convert to long format
			
@@ -665,10 +669,11 @@ def get_info_from_crf_filename(
 
				     dict with fields:
			
 
				         party: the party that submitted the data (3 letter UNFCCC_GHG_data)
			
 
				         submission_year: year of submission
			
 
				-        data_year: year in which the meissions took place
			
 
				+        data_year: year in which the emissions took place
			
 
				         date: date of the submission
			
 
				         extra: rest of the file name
			
 
				     """
			
 
				+    filename = os.path.splitext(filename)[0]
			
 
				     name_parts = filename.split("_")
			
 
				     file_info = {}
			
 
				     file_info["party"] = name_parts[0]
			
@@ -680,7 +685,11 @@ def get_info_from_crf_filename(
 
				               "could not be converted to int.")
			
 
				         file_info["data_year"] = name_parts[2]
			
 
				     file_info["date"] = name_parts[3]
			
 
				-    file_info["extra"] = name_parts[4]
			
 
				+    # the last part (time code) is missing for Australia since 2023
			
 
				+    if len(name_parts) > 4:
			
 
				+        file_info["extra"] = name_parts[4]
			
 
				+    else:
			
 
				+        file_info["extra"] = ""
			
 
				     return file_info
			
 
				 
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
@@ -23,6 +23,7 @@ def read_year_to_test_specs(
 
				         submission_year: int,
			
 
				         data_year: Optional[int]=None,
			
 
				         totest: Optional[bool]=False,
			
 
				+        country_code: Optional=None,
			
 
				 ) -> xr.Dataset:
			
 
				     """
			
 
				     Read one xlsx file (so one data year) for each country for a submission year to
			
@@ -41,11 +42,33 @@ def read_year_to_test_specs(
 
				     if totest:
			
 
				         print("Reading only tables to test.")
			
 
				     print("#"*80)
			
 
				-    try:
			
 
				-        crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				-    except:
			
 
				-        raise ValueError(f"No terminology exists for submission years {submission_year}, "
			
 
				-                         f"{submission_year - 1}")
			
 
				+
			
 
				+
			
 
				+    # get specification
			
 
				+    # if we only have a single country check if we might have a country specific
			
 
				+    # specification (currently only Australia, 2023)
			
 
				+    if country_code is not None:
			
 
				+        try:
			
 
				+            crf_spec = getattr(crf, f"CRF{submission_year}_{country_code}")
			
 
				+            print(
			
 
				+                f"Using country specific specification: "
			
 
				+                f"CRF{submission_year}_{country_code}"
			
 
				+            )
			
 
				+        except:
			
 
				+            # no country specific specification, check for general specification
			
 
				+            try:
			
 
				+                crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				+            except:
			
 
				+                raise ValueError(
			
 
				+                    f"No terminology exists for submission year " f"{submission_year}"
			
 
				+                )
			
 
				+    else:
			
 
				+        try:
			
 
				+            crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				+        except:
			
 
				+            raise ValueError(
			
 
				+                f"No terminology exists for submission year " f"{submission_year}"
			
 
				+            )
			
 
				 
			
 
				     if totest:
			
 
				         tables = [table for table in crf_spec.keys()
			
@@ -57,7 +80,11 @@ def read_year_to_test_specs(
 
				           f"CRF{submission_year} specification: {tables}")
			
 
				     print("#" * 80)
			
 
				 
			
 
				-    for country_code in all_crf_countries:
			
 
				+    if country_code is not None:
			
 
				+        countries_to_read = [country_code]
			
 
				+    else:
			
 
				+        countries_to_read = all_crf_countries
			
 
				+    for country_code in countries_to_read:
			
 
				         # get country name
			
 
				         country_name = get_country_name(country_code)
			
 
				         print(f"Reading for {country_name}")
			
@@ -116,14 +143,32 @@ def read_year_to_test_specs(
 
				     # process log messages.
			
 
				     today = date.today()
			
 
				     if len(unknown_categories) > 0:
			
 
				-        log_location = log_path / f"CRF{submission_year}" \
			
 
				-                       / f"{data_year}_unknown_categories_{today.strftime('%Y-%m-%d')}.csv"
			
 
				+        if country_code is not None:
			
 
				+            log_location = (
			
 
				+                log_path
			
 
				+                / f"CRF{submission_year}"
			
 
				+                / f"{data_year}_unknown_categories_{country_code}"
			
 
				+                  f"_{today.strftime('%Y-%m-%d')}.csv"
			
 
				+            )
			
 
				+        else:
			
 
				+            log_location = (log_path / f"CRF{submission_year}"
			
 
				+                            / f"{data_year}_unknown_categories_"
			
 
				+                              f"{today.strftime('%Y-%m-%d')}.csv")
			
 
				         print(f"Unknown rows found. Savin log to {log_location}")
			
 
				         save_unknown_categories_info(unknown_categories, log_location)
			
 
				 
			
 
				     if len(last_row_info) > 0:
			
 
				-        log_location = log_path / f"CRF{submission_year}" \
			
 
				-                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
			
 
				+        if country_code is not None:
			
 
				+            log_location = (
			
 
				+               log_path
			
 
				+               / f"CRF{submission_year}"
			
 
				+               / f"{data_year}_last_row_info_{country_code}_"
			
 
				+                 f"{today.strftime('%Y-%m-%d')}.csv"
			
 
				+           )
			
 
				+        else:
			
 
				+            log_location = (log_path / f"CRF{submission_year}"
			
 
				+                            / f"{data_year}_last_row_info_"
			
 
				+                              f"{today.strftime('%Y-%m-%d')}.csv")
			
 
				         print(f"Data found in the last row. Saving log to "
			
 
				               f"{log_location}")
			
 
				         save_last_row_info(last_row_info, log_location)
			
@@ -131,7 +176,11 @@ def read_year_to_test_specs(
 
				     # save the data:
			
 
				     compression = dict(zlib=True, complevel=9)
			
 
				     output_folder = log_path / f"test_read_CRF{submission_year}"
			
 
				-    output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
			
 
				+    if country_code is not None:
			
 
				+        output_filename = (f"CRF{submission_year}_{country_code}_"
			
 
				+                           f"{today.strftime('%Y-%m-%d')}")
			
 
				+    else:
			
 
				+        output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
			
 
				     if totest:
			
 
				         output_filename = output_filename + "_totest"
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
@@ -90,12 +90,23 @@ def read_crf_for_country(
 
				     # get country name
			
 
				     country_name = get_country_name(country_code)
			
 
				 
			
 
				-    # get specification and available tables
			
 
				+
			
 
				+    # get specification
			
 
				+    # if we only have a single country check if we might have a country specific
			
 
				+    # specification (currently only Australia, 2023)
			
 
				     try:
			
 
				-        crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				-        #print(table_spec)
			
 
				+        crf_spec = getattr(crf, f"CRF{submission_year}_{country_code}")
			
 
				+        print(f"Using country specific specification: "
			
 
				+              f"CRF{submission_year}_{country_code}")
			
 
				     except:
			
 
				-        raise ValueError(f"No terminology exists for submission year {submission_year}")
			
 
				+        # no country specific specification, check for general specification
			
 
				+        try:
			
 
				+            crf_spec = getattr(crf, f"CRF{submission_year}")
			
 
				+        except:
			
 
				+            raise ValueError(
			
 
				+                f"No terminology exists for submission year " f"{submission_year}"
			
 
				+            )
			
 
				+
			
 
				 
			
 
				     tables = [table for table in crf_spec.keys()
			
 
				               if crf_spec[table]["status"] == "tested"]
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_AUS_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_AUS_specification.py
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
@@ -5,5 +5,6 @@ Define the CRF specifications here for easy access
 
				 from .CRF2021_specification import CRF2021
			
 
				 from .CRF2022_specification import CRF2022
			
 
				 from .CRF2023_specification import CRF2023
			
 
				+from .CRF2023_AUS_specification import CRF2023_AUS
			
 
				 
			
 
				-__all__ = ["CRF2021", "CRF2022", "CRF2023"]
			
 
				+__all__ = ["CRF2021", "CRF2022", "CRF2023", "CRF2023_AUS"]
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
@@ -16,6 +16,7 @@ unit_info = {
 
				         "regexp_unit": r"\((.*)\)",
			
 
				         "manual_repl_unit": {
			
 
				             "CO2 equivalent (kt)": "kt CO2eq",
			
 
				+            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table2(II)
			
 
				         },
			
 
				         "default_unit": "t",
			
 
				     },
			
@@ -26,8 +27,11 @@ unit_info = {
 
				         "regexp_unit": r"\((.*)\)",
			
 
				         "manual_repl_unit": {
			
 
				             "CO2 equivalent (kt)": "kt CO2eq",
			
 
				-            "CO2 equivalents (kt)": "kt CO2eq", # for AUS Table1
			
 
				+            "CO2 equivalents (kt) (2) ": "kt CO2eq", # for AUS Table1
			
 
				             "CO2 equivalent (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
			
 
				+            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
			
 
				+            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table3
			
 
				+            "CO2 equivalents (kt) (4)": "kt CO2eq", # for AUS Table4
			
 
				         },
			
 
				         "default_unit": "kt",
			
 
				     },
			
@@ -38,7 +42,7 @@ unit_info = {
 
				         "regexp_unit": r"\((.*)\)",
			
 
				         "manual_repl_unit": {
			
 
				             "(kt CO2 equivalent)": "kt CO2eq",
			
 
				-            "CO2 equivalent (kt) (3)": "kt CO2eq", # for AUS
			
 
				+            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS
			
 
				         },
			
 
				         "default_unit": "kt",
			
 
				     },
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
@@ -9,12 +9,15 @@ import argparse
 
				 parser = argparse.ArgumentParser()
			
 
				 parser.add_argument('--submission_year', help='Submission round to read', type=int)
			
 
				 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
			
 
				+parser.add_argument('--country', help='Country to read', type=str, default=None)
			
 
				 parser.add_argument('--totest', help='read tables to test', action='store_true')
			
 
				 args = parser.parse_args()
			
 
				 
			
 
				 
			
 
				 submission_year = args.submission_year
			
 
				 data_year = args.data_year
			
 
				+country = args.country
			
 
				+#print(f"totest: {args.totest}")
			
 
				 if args.totest:
			
 
				     totest = True
			
 
				 else:
			
@@ -24,6 +27,7 @@ read_year_to_test_specs(
 
				     submission_year=submission_year,
			
 
				     data_year=data_year,
			
 
				     totest=totest,
			
 
				+    country_code=country,
			
 
				 )
			
 
				 
			
 
				 
			
--- a/dodo.py
+++ b/dodo.py
@@ -202,7 +202,7 @@ read_config_crf = {
 
				     "re_read": get_var('re_read', False),
			
 
				     "countries": get_var('countries', None),
			
 
				     "data_year": get_var('data_year', None),
			
 
				-    "totest": get_var('data_year', None),
			
 
				+    "totest": get_var('totest', None),
			
 
				 }
			
 
				 
			
 
				 def task_read_unfccc_crf_submission():
			
@@ -255,6 +255,7 @@ def task_test_read_unfccc_crf_for_year():
 
				                f"UNFCCC_GHG_data/UNFCCC_CRF_reader"
			
 
				                f"/test_read_UNFCCC_CRF_for_year.py "
			
 
				                f"--submission_year={read_config_crf['submission_year']} "
			
 
				+               f"--country={read_config_crf['country']} "
			
 
				                ]
			
 
				     if read_config_crf["totest"] == "True":
			
 
				         actions[0] = actions[0] + " --totest"