ソースを参照

More work on BTR/CRT reading code and specifications

Johannes Gütschow 4 ヶ月 前
コミット
3cc0201ff9

+ 1 - 1
dodo.py

@@ -492,7 +492,7 @@ def task_test_read_unfccc_crf_for_year():
             data_year=data_year,
             data_year=data_year,
             totest=totest,
             totest=totest,
             country_code=read_config_crf["country"],
             country_code=read_config_crf["country"],
-            type=read_config_crf["type"],
+            submission_type=read_config_crf["type"],
         )
         )
 
 
     return {
     return {

+ 1 - 1
src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py

@@ -59,7 +59,7 @@ if __name__ == "__main__":
                 country_info["name"],
                 country_info["name"],
                 submission_year=submission_year,
                 submission_year=submission_year,
                 submission_date=country_info["date"],
                 submission_date=country_info["date"],
-                type=type,
+                submission_type=type,
                 verbose=False,
                 verbose=False,
             )
             )
             if not data_read:
             if not data_read:

+ 42 - 6
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py

@@ -34,8 +34,8 @@ Missing tables are:
 
 
 
 
 TODO:
 TODO:
-* Add missing tables
-* Add activity data
+ * Add missing tables
+ * Add activity data
 
 
 """
 """
 
 
@@ -56,7 +56,11 @@ CRT1 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "categories": ["category"],
             "cols_to_ignore": [],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                '(1) "Total GHG emissions" does not include NOX, ' "CO, NMVOC and SOX.",
+            ],
             "unit_info": unit_info["industry"],
             "unit_info": unit_info["industry"],
         },
         },
         "sector_mapping": [
         "sector_mapping": [
@@ -141,7 +145,14 @@ CRT1 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
                 "AMOUNT CAPTURED (4) CO2",
             ],
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                "Note: Minimum level of aggregation is needed to protect "
+                "confidential business and military information, where it "
+                "would identify particular entity's/entities' "
+                "confidential data.",
+            ],
             "unit_info": unit_info["default"],
             "unit_info": unit_info["default"],
         },
         },
         "sector_mapping": [
         "sector_mapping": [
@@ -257,7 +268,7 @@ CRT1 = {
         "status": "tested",
         "status": "tested",
         "table": {
         "table": {
             "firstrow": 7,
             "firstrow": 7,
-            "lastrow": 119,
+            "lastrow": 131,
             "header": ["group", "entity", "unit"],
             "header": ["group", "entity", "unit"],
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "categories": ["category", "class"],
@@ -269,7 +280,14 @@ CRT1 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
                 "AMOUNT CAPTURED (4) CO2",
             ],
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                "Note: Minimum level of aggregation is needed to protect "
+                "confidential business and military information, where it "
+                "would identify particular entity's/entities' confidential "
+                "data.",
+            ],
             "unit_info": unit_info["default"],
             "unit_info": unit_info["default"],
         },
         },
         "sector_mapping": [
         "sector_mapping": [
@@ -408,6 +426,12 @@ CRT1 = {
             ["Liquefied petroleum gases (LPG)", ["1.A.2.g.vii", "LPG"], 3],
             ["Liquefied petroleum gases (LPG)", ["1.A.2.g.vii", "LPG"], 3],
             ["Other liquid fuels (please specify)", ["1.A.2.g.vii", "OtherLiquid"], 3],
             ["Other liquid fuels (please specify)", ["1.A.2.g.vii", "OtherLiquid"], 3],
             ["NA", ["\\IGNORE", "\\IGNORE"], 3],
             ["NA", ["\\IGNORE", "\\IGNORE"], 3],
+            # GUY
+            [
+                "Other liquid fuels [IPCC Software 1.A.3.e.ii]",
+                ["1.A.2.g.vii", "OLOther"],
+                4,
+            ],
             ["Gaseous fuels (6)", ["1.A.2.g.vii", "Gaseous"], 3],
             ["Gaseous fuels (6)", ["1.A.2.g.vii", "Gaseous"], 3],
             ["Other fossil fuels (7)", ["1.A.2.g.vii", "OtherFF"], 3],
             ["Other fossil fuels (7)", ["1.A.2.g.vii", "OtherFF"], 3],
             ["Biomass (3)", ["1.A.2.g.vii", "Biomass"], 3],
             ["Biomass (3)", ["1.A.2.g.vii", "Biomass"], 3],
@@ -420,6 +444,18 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.3", "OtherFF"], 4],
             ["Other fossil fuels (7)", ["1.A.2.g.viii.3", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.3", "Peat"], 4],
             ["Peat (8)", ["1.A.2.g.viii.3", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.3", "Biomass"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.3", "Biomass"], 4],
+            # GUY
+            [
+                "Non-specified Industry [IPCC Software 1.A.2.m]",
+                ["1.A.2.g.viii.1", "Total"],
+                3,
+            ],
+            ["Liquid fuels", ["1.A.2.g.viii.1", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.1", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.1", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.1", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.1", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.1", "Biomass"], 4],
         ],
         ],
         "entity_mapping": {
         "entity_mapping": {
             "EMISSIONS CH4": "CH4",
             "EMISSIONS CH4": "CH4",

+ 35 - 29
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -39,7 +39,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     filter_remove_input: dict[str, dict[str, str | list]] | None = None,
     filter_remove_input: dict[str, dict[str, str | list]] | None = None,
     filter_keep_input: dict[str, dict[str, str | list]] | None = None,
     filter_keep_input: dict[str, dict[str, str | list]] | None = None,
     meta_data_input: dict[str, str] | None = None,
     meta_data_input: dict[str, str] | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> pd.DataFrame:
 ) -> pd.DataFrame:
     """
     """
     Convert a given pandas long format crf table to PRIMAP2 interchange format
     Convert a given pandas long format crf table to PRIMAP2 interchange format
@@ -63,7 +63,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     meta_data_input: Optional[Dict[str,str]]
     meta_data_input: Optional[Dict[str,str]]
         Meta data information. If values filled by this function automatically
         Meta data information. If values filled by this function automatically
         are given as input the automatic values are overwritten.
         are given as input the automatic values are overwritten.
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
         read CRF or CRF data
 
 
     Returns
     Returns
@@ -73,7 +73,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
         Metadata is stored as attrs in the DataFrame
         Metadata is stored as attrs in the DataFrame
     """
     """
     # check type
     # check type
-    if type not in ["CRF", "CRT"]:
+    if submission_type not in ["CRF", "CRT"]:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
 
 
     coords_cols = {
     coords_cols = {
@@ -86,7 +86,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     }
     }
 
 
     # set scenario and terminologies
     # set scenario and terminologies
-    if type == "CRF":
+    if submission_type == "CRF":
         category_terminology = f"CRF2013_{submission_year}"
         category_terminology = f"CRF2013_{submission_year}"
         class_terminology = "CRF2013"
         class_terminology = "CRF2013"
         scenario = f"CRF{submission_year}"
         scenario = f"CRF{submission_year}"
@@ -182,7 +182,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     data_year: int | list[int] | None = None,
     data_year: int | list[int] | None = None,
     date: str | None = None,
     date: str | None = None,
     folder: str | None = None,
     folder: str | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     debug: bool = False,
     debug: bool = False,
 ) -> tuple[pd.DataFrame, list[list], list[list]]:
 ) -> tuple[pd.DataFrame, list[list], list[list]]:
     """
     """
@@ -212,7 +212,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     folder: str (optional)
     folder: str (optional)
         Folder that contains the xls files. If not given folders are determined by the
         Folder that contains the xls files. If not given folders are determined by the
         submissions_year and country_code variables
         submissions_year and country_code variables
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
         read CRF or CRF data
     debug: bool (optional)
     debug: bool (optional)
         if true print some debug information like column headers
         if true print some debug information like column headers
@@ -228,7 +228,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
 
     """
     """
     # check type
     # check type
-    if type not in ["CRF", "CRT"]:
+    if submission_type not in ["CRF", "CRT"]:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
 
 
     if isinstance(country_codes, str):
     if isinstance(country_codes, str):
@@ -241,7 +241,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
         data_year=data_year,
         data_year=data_year,
         date=date,
         date=date,
         folder=folder,
         folder=folder,
-        type=type,
+        submission_type=submission_type,
     )
     )
     # nasty fix for cases where exporting ran overnight and not all files have
     # nasty fix for cases where exporting ran overnight and not all files have
     # the same date
     # the same date
@@ -270,7 +270,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
                 data_year=data_year,
                 data_year=data_year,
                 date=prv_date,
                 date=prv_date,
                 folder=folder,
                 folder=folder,
-                type=type,
+                submission_type=submission_type,
             )
             )
             if len(more_input_files) > 0:
             if len(more_input_files) > 0:
                 print(f"Found {len(more_input_files)} additional input files.")
                 print(f"Found {len(more_input_files)} additional input files.")
@@ -278,7 +278,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
             else:
             else:
                 print("Found no additional input files")
                 print("Found no additional input files")
 
 
-    if input_files == []:
+    if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
         raise NoCRFFilesError(  # noqa: TRY003
             f"No files found for {country_codes}, "
             f"No files found for {country_codes}, "
             f"submission_year={submission_year}, "
             f"submission_year={submission_year}, "
@@ -292,22 +292,24 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     # specification (currently only Australia, 2023)
     # specification (currently only Australia, 2023)
     if len(country_codes) == 1:
     if len(country_codes) == 1:
         try:
         try:
-            crf_spec = getattr(crf, f"{type}{submission_year}_{country_codes[0]}")
+            crf_spec = getattr(
+                crf, f"{submission_type}{submission_year}_{country_codes[0]}"
+            )
             print(
             print(
                 f"Using country specific specification: "
                 f"Using country specific specification: "
-                f"{type}{submission_year}_{country_codes[0]}"
+                f"{submission_type}{submission_year}_{country_codes[0]}"
             )
             )
         except:  # noqa: E722
         except:  # noqa: E722
             # no country specific specification, check for general specification
             # no country specific specification, check for general specification
             try:
             try:
-                crf_spec = getattr(crf, f"{type}{submission_year}")
+                crf_spec = getattr(crf, f"{submission_type}{submission_year}")
             except:  # noqa: E722
             except:  # noqa: E722
                 raise ValueError(  # noqa: TRY003, TRY200
                 raise ValueError(  # noqa: TRY003, TRY200
                     f"No terminology exists for submission year " f"{submission_year}"
                     f"No terminology exists for submission year " f"{submission_year}"
                 )
                 )
     else:
     else:
         try:
         try:
-            crf_spec = getattr(crf, f"{type}{submission_year}")
+            crf_spec = getattr(crf, f"{submission_type}{submission_year}")
         except:  # noqa: E722
         except:  # noqa: E722
             raise ValueError(  # noqa: TRY003, TRY200
             raise ValueError(  # noqa: TRY003, TRY200
                 f"No terminology exists for submission year " f"{submission_year}"
                 f"No terminology exists for submission year " f"{submission_year}"
@@ -701,7 +703,7 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     data_year: Optional[Union[int, list[int]]] = None,
     data_year: Optional[Union[int, list[int]]] = None,
     date: Optional[str] = None,
     date: Optional[str] = None,
     folder: Optional[str] = None,
     folder: Optional[str] = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> list[Path]:
 ) -> list[Path]:
     """
     """
     Find all files according to given parameters
     Find all files according to given parameters
@@ -725,17 +727,17 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
         Folder that contains the xls files. If not given fodlers are determined by the
         Folder that contains the xls files. If not given fodlers are determined by the
         submissions_year and country_code variables
         submissions_year and country_code variables
 
 
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
         read CRF or CRF data
 
 
     Returns
     Returns
     -------
     -------
         List[Path]: list of Path objects for the files
         List[Path]: list of Path objects for the files
     """
     """
-    if type == "CRT":
+    if submission_type == "CRT":
         type_folder = "BTR"
         type_folder = "BTR"
     else:
     else:
-        type_folder = type
+        type_folder = submission_type
 
 
     if isinstance(country_codes, str):
     if isinstance(country_codes, str):
         country_codes = [country_codes]
         country_codes = [country_codes]
@@ -776,7 +778,7 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
         country_folders = [folder]
         country_folders = [folder]
 
 
     file_filter_template = {}
     file_filter_template = {}
-    if type == "CRF":
+    if submission_type == "CRF":
         file_filter_template["submission_year"] = submission_year
         file_filter_template["submission_year"] = submission_year
     # don't filter for submission year in BTR as it's  the actual year and
     # don't filter for submission year in BTR as it's  the actual year and
     # not the submissions round (and we don't know yet if it will be the same
     # not the submissions round (and we don't know yet if it will be the same
@@ -854,7 +856,7 @@ def get_info_from_crf_filename(  # noqa: PLR0912
         try:
         try:
             file_info["data_year"] = int(name_parts[2])
             file_info["data_year"] = int(name_parts[2])
         except:  # noqa: E722
         except:  # noqa: E722
-            print(f"Data year string {name_parts[2]} " "could not be converted to int.")
+            print(f"Data year string {name_parts[2]} could not be converted to int.")
             file_info["data_year"] = name_parts[2]
             file_info["data_year"] = name_parts[2]
         file_info["date"] = name_parts[3]
         file_info["date"] = name_parts[3]
         # the last part (time code) is missing for CRT tables in CRF sile format
         # the last part (time code) is missing for CRT tables in CRF sile format
@@ -1082,7 +1084,7 @@ def create_category_tree(
 
 
         elif current_cat_level < last_cat_info["level"]:
         elif current_cat_level < last_cat_info["level"]:
             # the new level is smaller (closer to the trunk)
             # the new level is smaller (closer to the trunk)
-            # than the last one. Thus we remove all parents
+            # than the last one. Thus, we remove all parents
             # from this level on
             # from this level on
             parent_info = parent_info[0 : current_cat_level + 1]
             parent_info = parent_info[0 : current_cat_level + 1]
             category_tree.create_node(
             category_tree.create_node(
@@ -1154,18 +1156,18 @@ def filter_category(
 def get_latest_date_for_country(
 def get_latest_date_for_country(
     country_code: str,
     country_code: str,
     submission_year: int,
     submission_year: int,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> str:
 ) -> str:
     """
     """
     Find the latest submission date for a country
     Find the latest submission date for a country
 
 
     Parameters
     Parameters
     ----------
     ----------
-    country: str
+    country_code: str
         3-letter country code
         3-letter country code
     submission_year: int
     submission_year: int
         Year of the submission to find the l;atest date for
         Year of the submission to find the l;atest date for
-    type: str, default CRF
+    submission_type: str, default CRF
         Check for CRF or CRT tables
         Check for CRF or CRT tables
 
 
     Returns
     Returns
@@ -1175,19 +1177,20 @@ def get_latest_date_for_country(
     with open(downloaded_data_path_UNFCCC / "folder_mapping.json") as mapping_file:
     with open(downloaded_data_path_UNFCCC / "folder_mapping.json") as mapping_file:
         folder_mapping = json.load(mapping_file)
         folder_mapping = json.load(mapping_file)
 
 
-    if type == "CRT":
+    if submission_type == "CRT":
         type_folder = "BTR"
         type_folder = "BTR"
         if country_code == "AUS" and submission_year == 1:
         if country_code == "AUS" and submission_year == 1:
             date_format = "%d%m%Y"
             date_format = "%d%m%Y"
         else:
         else:
             date_format = "%Y%m%d"
             date_format = "%Y%m%d"
     else:
     else:
-        type_folder = type
+        type_folder = submission_type
         date_format = "%d%m%Y"
         date_format = "%d%m%Y"
     if country_code in folder_mapping:
     if country_code in folder_mapping:
-        file_filter = {}
-        file_filter["party"] = country_code
-        if type == "CRF":
+        file_filter = {
+            "party": country_code,
+        }
+        if submission_type == "CRF":
             file_filter["submission_year"] = submission_year
             file_filter["submission_year"] = submission_year
         # don't filter for submission year in BTR as it's  the actual year and
         # don't filter for submission year in BTR as it's  the actual year and
         # not the submissions round (and we don't know yet if it will be the same
         # not the submissions round (and we don't know yet if it will be the same
@@ -1309,6 +1312,9 @@ def find_latest_date(
     ----------
     ----------
     dates: List[str]
     dates: List[str]
         List of dates
         List of dates
+    date_format: str, default "%d%m%Y"
+        Format for the date. Unfortunately CRF uses %d%m%Y while CRT uses %Y%m%d with
+        some exceptions for early submissions which use the CRF file namig scheme
 
 
     Returns
     Returns
     -------
     -------

+ 45 - 42
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py

@@ -29,7 +29,7 @@ from .util import all_crf_countries
 def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
 def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     submission_year: int,
     submission_year: int,
     data_year: int | None = None,
     data_year: int | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     totest: bool | None = False,
     totest: bool | None = False,
     country_code: str | None = None,
     country_code: str | None = None,
 ) -> xr.Dataset:
 ) -> xr.Dataset:
@@ -45,7 +45,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
         submission year to read
         submission year to read
     data_year
     data_year
         year to read
         year to read
-    type: str = CRF
+    submission_type: str = CRF
         read CRF or CRT data
         read CRF or CRT data
     totest
     totest
         if true only read tables with "totest" status
         if true only read tables with "totest" status
@@ -57,9 +57,9 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     xr.Dataset with data for given parameters
     xr.Dataset with data for given parameters
     """
     """
     # long name for type
     # long name for type
-    if type == "CRF":
+    if submission_type == "CRF":
         type_name = "common reporting format"
         type_name = "common reporting format"
-    elif type == "CRT":
+    elif submission_type == "CRT":
         type_name = "common reporting tables"
         type_name = "common reporting tables"
     else:
     else:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -75,7 +75,8 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     last_row_info = []
     last_row_info = []
     ds_all = None
     ds_all = None
     print(
     print(
-        f"{type} test reading for {type}{submission_year}. Using data year {data_year}"
+        f"{submission_type} test reading for {submission_type}{submission_year}. "
+        f"Using data year {data_year}"
     )
     )
     if totest:
     if totest:
         print("Reading only tables to test.")
         print("Reading only tables to test.")
@@ -84,9 +85,9 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     if country_code is not None:
     if country_code is not None:
         countries_to_read = [country_code]
         countries_to_read = [country_code]
     else:  # noqa: PLR5501
     else:  # noqa: PLR5501
-        if type == "CRF":
+        if submission_type == "CRF":
             countries_to_read = all_crf_countries
             countries_to_read = all_crf_countries
-        elif type == "CRT":
+        elif submission_type == "CRT":
             countries_to_read = all_countries
             countries_to_read = all_countries
         else:
         else:
             raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
             raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -100,16 +101,16 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
         if current_country_code is not None:
         if current_country_code is not None:
             try:
             try:
                 crf_spec = getattr(
                 crf_spec = getattr(
-                    crf, f"{type}{submission_year}_{current_country_code}"
+                    crf, f"{submission_type}{submission_year}_{current_country_code}"
                 )
                 )
                 print(
                 print(
                     f"Using country specific specification: "
                     f"Using country specific specification: "
-                    f"{type}{submission_year}_{current_country_code}"
+                    f"{submission_type}{submission_year}_{current_country_code}"
                 )
                 )
             except Exception:
             except Exception:
                 # no country specific specification, check for general specification
                 # no country specific specification, check for general specification
                 try:
                 try:
-                    crf_spec = getattr(crf, f"{type}{submission_year}")
+                    crf_spec = getattr(crf, f"{submission_type}{submission_year}")
                 except Exception as ex:
                 except Exception as ex:
                     raise ValueError(  # noqa: TRY003
                     raise ValueError(  # noqa: TRY003
                         f"No terminology exists for submission year "
                         f"No terminology exists for submission year "
@@ -117,10 +118,10 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                     ) from ex
                     ) from ex
         else:
         else:
             try:
             try:
-                crf_spec = getattr(crf, f"{type}{submission_year}")
+                crf_spec = getattr(crf, f"{submission_type}{submission_year}")
             except Exception as ex:
             except Exception as ex:
                 raise ValueError(  # noqa: TRY003
                 raise ValueError(  # noqa: TRY003
-                    f"No terminology exists for {type}{submission_year}"
+                    f"No terminology exists for {submission_type}{submission_year}"
                 ) from ex
                 ) from ex
 
 
         if totest:
         if totest:
@@ -137,17 +138,18 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
             ]
             ]
         print(
         print(
             f"The following tables are available in the "
             f"The following tables are available in the "
-            f"{type}{submission_year} specification: {tables}"
+            f"{submission_type}{submission_year} specification: {tables}"
         )
         )
         print("#" * 80)
         print("#" * 80)
 
 
         try:
         try:
             submission_date = get_latest_date_for_country(
             submission_date = get_latest_date_for_country(
-                current_country_code, submission_year, type=type
+                current_country_code, submission_year, submission_type=submission_type
             )
             )
         except Exception:
         except Exception:
             message = (
             message = (
-                f"No submissions for country {country_name}, {type}{submission_year}"
+                f"No submissions for country {country_name}, "
+                f"{submission_type}{submission_year}"
             )
             )
             print(message)
             print(message)
             exceptions.append(f"No_sub: {country_name}: {message}")
             exceptions.append(f"No_sub: {country_name}: {message}")
@@ -169,7 +171,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                         date=submission_date,
                         date=submission_date,
                         data_year=[data_year],
                         data_year=[data_year],
                         debug=True,
                         debug=True,
-                        type=type,
+                        submission_type=submission_type,
                     )
                     )
 
 
                     # collect messages on unknown rows etc
                     # collect messages on unknown rows etc
@@ -195,12 +197,12 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                         submission_year,
                         submission_year,
                         meta_data_input={
                         meta_data_input={
                             "title": f"Data submitted in {submission_year} to the "
                             "title": f"Data submitted in {submission_year} to the "
-                            f"UNFCCC in the {type_name} ({type}) "
+                            f"UNFCCC in the {type_name} ({submission_type}) "
                             f"by {country_name}. "
                             f"by {country_name}. "
                             f"Submission date: {submission_date}"
                             f"Submission date: {submission_date}"
                         },
                         },
                         entity_mapping=entity_mapping,
                         entity_mapping=entity_mapping,
-                        type=type,
+                        submission_type=submission_type,
                     )
                     )
 
 
                     # now convert to native PRIMAP2 format
                     # now convert to native PRIMAP2 format
@@ -240,27 +242,28 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                     else:
                     else:
                         ds_all = ds_all.combine_first(ds_table_pm2)
                         ds_all = ds_all.combine_first(ds_table_pm2)
                 except Exception as e:
                 except Exception as e:
-                    message = f"Error occured when converting table {table} for"
-                    f" {country_name} to PRIMAP2 IF. Exception: {e}"
+                    message = (
+                        f"Error occured when converting table {table} for"
+                        f" {country_name} to PRIMAP2 IF. Exception: {e}"
+                    )
                     print(message)
                     print(message)
                     exceptions.append(f"Error: {country_name}: {message}")
                     exceptions.append(f"Error: {country_name}: {message}")
                     pass
                     pass
 
 
     # process log messages.
     # process log messages.
     today = date.today()
     today = date.today()
+    output_folder = log_path / f"test_read_{submission_type}{submission_year}"
+    if not output_folder.exists():
+        output_folder.mkdir()
     if len(unknown_categories) > 0:
     if len(unknown_categories) > 0:
         if country_code is not None:
         if country_code is not None:
             log_location = (
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_unknown_categories_{country_code}"
+                output_folder / f"{data_year}_unknown_categories_{country_code}"
                 f"_{today.strftime('%Y-%m-%d')}.csv"
                 f"_{today.strftime('%Y-%m-%d')}.csv"
             )
             )
         else:
         else:
             log_location = (
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_unknown_categories_"
+                output_folder / f"{data_year}_unknown_categories_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
             )
         print(f"Unknown rows found. Savin log to {log_location}")
         print(f"Unknown rows found. Savin log to {log_location}")
@@ -269,35 +272,41 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     if len(last_row_info) > 0:
     if len(last_row_info) > 0:
         if country_code is not None:
         if country_code is not None:
             log_location = (
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_last_row_info_{country_code}_"
+                output_folder / f"{data_year}_last_row_info_{country_code}_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
             )
         else:
         else:
             log_location = (
             log_location = (
-                log_path / f"{type}{submission_year}" / f"{data_year}_last_row_info_"
+                output_folder / f"{data_year}_last_row_info_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
             )
         print(f"Data found in the last row. Saving log to " f"{log_location}")
         print(f"Data found in the last row. Saving log to " f"{log_location}")
         save_last_row_info(last_row_info, log_location)
         save_last_row_info(last_row_info, log_location)
 
 
+    # write exceptions
+    f_ex = open(
+        output_folder / f"{data_year}_exceptions_{today.strftime('%Y-%m-%d')}.txt", "w"
+    )
+    for ex in exceptions:
+        f_ex.write(f"{ex}\n")
+    f_ex.close()
+
     # save the data:
     # save the data:
     print(f"Save dataset to log folder: {log_path}")
     print(f"Save dataset to log folder: {log_path}")
     compression = dict(zlib=True, complevel=9)
     compression = dict(zlib=True, complevel=9)
-    output_folder = log_path / f"test_read_{type}{submission_year}"
+
     if country_code is not None:
     if country_code is not None:
         output_filename = (
         output_filename = (
-            f"{type}{submission_year}_{country_code}_" f"{today.strftime('%Y-%m-%d')}"
+            f"{submission_type}{submission_year}_{country_code}_"
+            f"{today.strftime('%Y-%m-%d')}"
         )
         )
     else:
     else:
-        output_filename = f"{type}{submission_year}_{today.strftime('%Y-%m-%d')}"
+        output_filename = (
+            f"{submission_type}{submission_year}_{today.strftime('%Y-%m-%d')}"
+        )
     if totest:
     if totest:
         output_filename = output_filename + "_totest"
         output_filename = output_filename + "_totest"
 
 
-    if not output_folder.exists():
-        output_folder.mkdir()
-
     # write data in interchange format
     # write data in interchange format
     pm2.pm2io.write_interchange_format(
     pm2.pm2io.write_interchange_format(
         output_folder / output_filename, ds_all.pr.to_interchange_format()
         output_folder / output_filename, ds_all.pr.to_interchange_format()
@@ -307,12 +316,6 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     encoding = {var: compression for var in ds_all.data_vars}
     encoding = {var: compression for var in ds_all.data_vars}
     ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
     ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
 
 
-    # write exceptions
-    f_ex = open(output_folder / f"exceptions_{output_filename}.txt", "w")
-    for ex in exceptions:
-        f_ex.write(f"{ex}\n")
-    f_ex.close()
-
     return ds_all
     return ds_all
 
 
 
 

+ 17 - 11
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py

@@ -140,7 +140,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         country_name,
         country_name,
         submission_year=submission_year,
         submission_year=submission_year,
         submission_date=submission_date,
         submission_date=submission_date,
-        type=type,
+        submission_type=type,
         verbose=True,
         verbose=True,
     )
     )
 
 
@@ -151,7 +151,11 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         for table in tables:
         for table in tables:
             # read table for all years
             # read table for all years
             ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
             ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
-                country_code, table, submission_year, date=submission_date, type=type
+                country_code,
+                table,
+                submission_year,
+                date=submission_date,
+                submission_type=type,
             )  # , data_year=[1990])
             )  # , data_year=[1990])
 
 
             # collect messages on unknown rows etc
             # collect messages on unknown rows etc
@@ -177,7 +181,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
                     f"Submission date: {submission_date}"
                     f"Submission date: {submission_date}"
                 },
                 },
                 entity_mapping=entity_mapping,
                 entity_mapping=entity_mapping,
-                type=type,
+                submission_type=type,
             )
             )
 
 
             # now convert to native PRIMAP2 format
             # now convert to native PRIMAP2 format
@@ -499,7 +503,7 @@ def read_new_crf_for_year_datalad(  # noqa: PLR0912
                     country_info["name"],
                     country_info["name"],
                     submission_year=submission_year,
                     submission_year=submission_year,
                     submission_date=country_info["date"],
                     submission_date=country_info["date"],
-                    type=type,
+                    submission_type=type,
                     verbose=False,
                     verbose=False,
                 )
                 )
                 if not data_read:
                 if not data_read:
@@ -603,7 +607,7 @@ def get_input_and_output_files_for_country(
         country_codes=country_code,
         country_codes=country_code,
         submission_year=submission_year,
         submission_year=submission_year,
         date=submission_date,
         date=submission_date,
-        type=type,
+        submission_type=type,
     )
     )
     if not input_files:
     if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
         raise NoCRFFilesError(  # noqa: TRY003
@@ -648,7 +652,7 @@ def submission_has_been_read(  # noqa: PLR0913
     country_name: str,
     country_name: str,
     submission_year: int,
     submission_year: int,
     submission_date: str,
     submission_date: str,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     verbose: Optional[bool] = True,
     verbose: Optional[bool] = True,
 ) -> bool:
 ) -> bool:
     """
     """
@@ -664,7 +668,7 @@ def submission_has_been_read(  # noqa: PLR0913
         year of submissions for CRF or submission round for CRT
         year of submissions for CRF or submission round for CRT
     submission_date
     submission_date
         date of submission (as in the filename)
         date of submission (as in the filename)
-    type: str: default "CRF"
+    submission_type: str: default "CRF"
         CRF or CRT
         CRF or CRT
     verbose: bool (optional, default True)
     verbose: bool (optional, default True)
         if True print additional output
         if True print additional output
@@ -674,7 +678,9 @@ def submission_has_been_read(  # noqa: PLR0913
     True if data has been read, False otherwise
     True if data has been read, False otherwise
     """
     """
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
-    output_filename = f"{country_code}_{type}{submission_year}_{submission_date}"
+    output_filename = (
+        f"{country_code}_{submission_type}{submission_year}_{submission_date}"
+    )
 
 
     #    check if the submission_year is correctly used for CRT
     #    check if the submission_year is correctly used for CRT
     if output_folder.exists():
     if output_folder.exists():
@@ -685,14 +691,14 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
             if verbose:
                 print(
                 print(
                     f"Data already available for {country_code}, "
                     f"Data already available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}."
+                    f"{submission_type}{submission_year}, version {submission_date}."
                 )
                 )
         elif existing_suffixes:
         elif existing_suffixes:
             has_been_read = False
             has_been_read = False
             if verbose:
             if verbose:
                 print(
                 print(
                     f"Partial data available for {country_code}, "
                     f"Partial data available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {submission_date}. "
                     "Please check if all files have been written after "
                     "Please check if all files have been written after "
                     f"reading. Existing suffixes: {existing_suffixes}"
                     f"reading. Existing suffixes: {existing_suffixes}"
                 )
                 )
@@ -701,7 +707,7 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
             if verbose:
                 print(
                 print(
                     f"No read data available for {country_code}, "
                     f"No read data available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {submission_date}. "
                 )
                 )
     else:
     else:
         has_been_read = False
         has_been_read = False