Browse Source

More work on BTR/CRT reading code and specifications

Johannes Gütschow 3 months ago
parent
commit
3cc0201ff9

+ 1 - 1
dodo.py

@@ -492,7 +492,7 @@ def task_test_read_unfccc_crf_for_year():
             data_year=data_year,
             totest=totest,
             country_code=read_config_crf["country"],
-            type=read_config_crf["type"],
+            submission_type=read_config_crf["type"],
         )
 
     return {

+ 1 - 1
src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py

@@ -59,7 +59,7 @@ if __name__ == "__main__":
                 country_info["name"],
                 submission_year=submission_year,
                 submission_date=country_info["date"],
-                type=type,
+                submission_type=type,
                 verbose=False,
             )
             if not data_read:

+ 42 - 6
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py

@@ -34,8 +34,8 @@ Missing tables are:
 
 
 TODO:
-* Add missing tables
-* Add activity data
+ * Add missing tables
+ * Add activity data
 
 """
 
@@ -56,7 +56,11 @@ CRT1 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                '(1) "Total GHG emissions" does not include NOX, ' "CO, NMVOC and SOX.",
+            ],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -141,7 +145,14 @@ CRT1 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                "Note: Minimum level of aggregation is needed to protect "
+                "confidential business and military information, where it "
+                "would identify particular entity's/entities' "
+                "confidential data.",
+            ],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -257,7 +268,7 @@ CRT1 = {
         "status": "tested",
         "table": {
             "firstrow": 7,
-            "lastrow": 119,
+            "lastrow": 131,
             "header": ["group", "entity", "unit"],
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
@@ -269,7 +280,14 @@ CRT1 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": [
+                "",
+                np.nan,
+                "Note: Minimum level of aggregation is needed to protect "
+                "confidential business and military information, where it "
+                "would identify particular entity's/entities' confidential "
+                "data.",
+            ],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -408,6 +426,12 @@ CRT1 = {
             ["Liquefied petroleum gases (LPG)", ["1.A.2.g.vii", "LPG"], 3],
             ["Other liquid fuels (please specify)", ["1.A.2.g.vii", "OtherLiquid"], 3],
             ["NA", ["\\IGNORE", "\\IGNORE"], 3],
+            # GUY
+            [
+                "Other liquid fuels [IPCC Software 1.A.3.e.ii]",
+                ["1.A.2.g.vii", "OLOther"],
+                4,
+            ],
             ["Gaseous fuels (6)", ["1.A.2.g.vii", "Gaseous"], 3],
             ["Other fossil fuels (7)", ["1.A.2.g.vii", "OtherFF"], 3],
             ["Biomass (3)", ["1.A.2.g.vii", "Biomass"], 3],
@@ -420,6 +444,18 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.3", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.3", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.3", "Biomass"], 4],
+            # GUY
+            [
+                "Non-specified Industry [IPCC Software 1.A.2.m]",
+                ["1.A.2.g.viii.1", "Total"],
+                3,
+            ],
+            ["Liquid fuels", ["1.A.2.g.viii.1", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.1", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.1", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.1", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.1", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.1", "Biomass"], 4],
         ],
         "entity_mapping": {
             "EMISSIONS CH4": "CH4",

+ 35 - 29
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -39,7 +39,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     filter_remove_input: dict[str, dict[str, str | list]] | None = None,
     filter_keep_input: dict[str, dict[str, str | list]] | None = None,
     meta_data_input: dict[str, str] | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> pd.DataFrame:
     """
     Convert a given pandas long format crf table to PRIMAP2 interchange format
@@ -63,7 +63,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     meta_data_input: Optional[Dict[str,str]]
         Meta data information. If values filled by this function automatically
         are given as input the automatic values are overwritten.
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
 
     Returns
@@ -73,7 +73,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
         Metadata is stored as attrs in the DataFrame
     """
     # check type
-    if type not in ["CRF", "CRT"]:
+    if submission_type not in ["CRF", "CRT"]:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
 
     coords_cols = {
@@ -86,7 +86,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     }
 
     # set scenario and terminologies
-    if type == "CRF":
+    if submission_type == "CRF":
         category_terminology = f"CRF2013_{submission_year}"
         class_terminology = "CRF2013"
         scenario = f"CRF{submission_year}"
@@ -182,7 +182,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     data_year: int | list[int] | None = None,
     date: str | None = None,
     folder: str | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     debug: bool = False,
 ) -> tuple[pd.DataFrame, list[list], list[list]]:
     """
@@ -212,7 +212,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     folder: str (optional)
         Folder that contains the xls files. If not given folders are determined by the
         submissions_year and country_code variables
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
     debug: bool (optional)
         if true print some debug information like column headers
@@ -228,7 +228,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
     """
     # check type
-    if type not in ["CRF", "CRT"]:
+    if submission_type not in ["CRF", "CRT"]:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
 
     if isinstance(country_codes, str):
@@ -241,7 +241,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
         data_year=data_year,
         date=date,
         folder=folder,
-        type=type,
+        submission_type=submission_type,
     )
     # nasty fix for cases where exporting ran overnight and not all files have
     # the same date
@@ -270,7 +270,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
                 data_year=data_year,
                 date=prv_date,
                 folder=folder,
-                type=type,
+                submission_type=submission_type,
             )
             if len(more_input_files) > 0:
                 print(f"Found {len(more_input_files)} additional input files.")
@@ -278,7 +278,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
             else:
                 print("Found no additional input files")
 
-    if input_files == []:
+    if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
             f"No files found for {country_codes}, "
             f"submission_year={submission_year}, "
@@ -292,22 +292,24 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     # specification (currently only Australia, 2023)
     if len(country_codes) == 1:
         try:
-            crf_spec = getattr(crf, f"{type}{submission_year}_{country_codes[0]}")
+            crf_spec = getattr(
+                crf, f"{submission_type}{submission_year}_{country_codes[0]}"
+            )
             print(
                 f"Using country specific specification: "
-                f"{type}{submission_year}_{country_codes[0]}"
+                f"{submission_type}{submission_year}_{country_codes[0]}"
             )
         except:  # noqa: E722
             # no country specific specification, check for general specification
             try:
-                crf_spec = getattr(crf, f"{type}{submission_year}")
+                crf_spec = getattr(crf, f"{submission_type}{submission_year}")
             except:  # noqa: E722
                 raise ValueError(  # noqa: TRY003, TRY200
                     f"No terminology exists for submission year " f"{submission_year}"
                 )
     else:
         try:
-            crf_spec = getattr(crf, f"{type}{submission_year}")
+            crf_spec = getattr(crf, f"{submission_type}{submission_year}")
         except:  # noqa: E722
             raise ValueError(  # noqa: TRY003, TRY200
                 f"No terminology exists for submission year " f"{submission_year}"
@@ -701,7 +703,7 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     data_year: Optional[Union[int, list[int]]] = None,
     date: Optional[str] = None,
     folder: Optional[str] = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> list[Path]:
     """
     Find all files according to given parameters
@@ -725,17 +727,17 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
         Folder that contains the xls files. If not given fodlers are determined by the
         submissions_year and country_code variables
 
-    type: str default = "CRF"
+    submission_type: str default = "CRF"
         read CRF or CRF data
 
     Returns
     -------
         List[Path]: list of Path objects for the files
     """
-    if type == "CRT":
+    if submission_type == "CRT":
         type_folder = "BTR"
     else:
-        type_folder = type
+        type_folder = submission_type
 
     if isinstance(country_codes, str):
         country_codes = [country_codes]
@@ -776,7 +778,7 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
         country_folders = [folder]
 
     file_filter_template = {}
-    if type == "CRF":
+    if submission_type == "CRF":
         file_filter_template["submission_year"] = submission_year
     # don't filter for submission year in BTR as it's  the actual year and
     # not the submissions round (and we don't know yet if it will be the same
@@ -854,7 +856,7 @@ def get_info_from_crf_filename(  # noqa: PLR0912
         try:
             file_info["data_year"] = int(name_parts[2])
         except:  # noqa: E722
-            print(f"Data year string {name_parts[2]} " "could not be converted to int.")
+            print(f"Data year string {name_parts[2]} could not be converted to int.")
             file_info["data_year"] = name_parts[2]
         file_info["date"] = name_parts[3]
         # the last part (time code) is missing for CRT tables in CRF sile format
@@ -1082,7 +1084,7 @@ def create_category_tree(
 
         elif current_cat_level < last_cat_info["level"]:
             # the new level is smaller (closer to the trunk)
-            # than the last one. Thus we remove all parents
+            # than the last one. Thus, we remove all parents
             # from this level on
             parent_info = parent_info[0 : current_cat_level + 1]
             category_tree.create_node(
@@ -1154,18 +1156,18 @@ def filter_category(
 def get_latest_date_for_country(
     country_code: str,
     submission_year: int,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> str:
     """
     Find the latest submission date for a country
 
     Parameters
     ----------
-    country: str
+    country_code: str
         3-letter country code
     submission_year: int
         Year of the submission to find the l;atest date for
-    type: str, default CRF
+    submission_type: str, default CRF
         Check for CRF or CRT tables
 
     Returns
@@ -1175,19 +1177,20 @@ def get_latest_date_for_country(
     with open(downloaded_data_path_UNFCCC / "folder_mapping.json") as mapping_file:
         folder_mapping = json.load(mapping_file)
 
-    if type == "CRT":
+    if submission_type == "CRT":
         type_folder = "BTR"
         if country_code == "AUS" and submission_year == 1:
             date_format = "%d%m%Y"
         else:
             date_format = "%Y%m%d"
     else:
-        type_folder = type
+        type_folder = submission_type
         date_format = "%d%m%Y"
     if country_code in folder_mapping:
-        file_filter = {}
-        file_filter["party"] = country_code
-        if type == "CRF":
+        file_filter = {
+            "party": country_code,
+        }
+        if submission_type == "CRF":
             file_filter["submission_year"] = submission_year
         # don't filter for submission year in BTR as it's  the actual year and
         # not the submissions round (and we don't know yet if it will be the same
@@ -1309,6 +1312,9 @@ def find_latest_date(
     ----------
     dates: List[str]
         List of dates
+    date_format: str, default "%d%m%Y"
+        Format for the date. Unfortunately CRF uses %d%m%Y while CRT uses %Y%m%d with
+        some exceptions for early submissions which use the CRF file namig scheme
 
     Returns
     -------

+ 45 - 42
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py

@@ -29,7 +29,7 @@ from .util import all_crf_countries
 def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     submission_year: int,
     data_year: int | None = None,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     totest: bool | None = False,
     country_code: str | None = None,
 ) -> xr.Dataset:
@@ -45,7 +45,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
         submission year to read
     data_year
         year to read
-    type: str = CRF
+    submission_type: str = CRF
         read CRF or CRT data
     totest
         if true only read tables with "totest" status
@@ -57,9 +57,9 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     xr.Dataset with data for given parameters
     """
     # long name for type
-    if type == "CRF":
+    if submission_type == "CRF":
         type_name = "common reporting format"
-    elif type == "CRT":
+    elif submission_type == "CRT":
         type_name = "common reporting tables"
     else:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -75,7 +75,8 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     last_row_info = []
     ds_all = None
     print(
-        f"{type} test reading for {type}{submission_year}. Using data year {data_year}"
+        f"{submission_type} test reading for {submission_type}{submission_year}. "
+        f"Using data year {data_year}"
     )
     if totest:
         print("Reading only tables to test.")
@@ -84,9 +85,9 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     if country_code is not None:
         countries_to_read = [country_code]
     else:  # noqa: PLR5501
-        if type == "CRF":
+        if submission_type == "CRF":
             countries_to_read = all_crf_countries
-        elif type == "CRT":
+        elif submission_type == "CRT":
             countries_to_read = all_countries
         else:
             raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -100,16 +101,16 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
         if current_country_code is not None:
             try:
                 crf_spec = getattr(
-                    crf, f"{type}{submission_year}_{current_country_code}"
+                    crf, f"{submission_type}{submission_year}_{current_country_code}"
                 )
                 print(
                     f"Using country specific specification: "
-                    f"{type}{submission_year}_{current_country_code}"
+                    f"{submission_type}{submission_year}_{current_country_code}"
                 )
             except Exception:
                 # no country specific specification, check for general specification
                 try:
-                    crf_spec = getattr(crf, f"{type}{submission_year}")
+                    crf_spec = getattr(crf, f"{submission_type}{submission_year}")
                 except Exception as ex:
                     raise ValueError(  # noqa: TRY003
                         f"No terminology exists for submission year "
@@ -117,10 +118,10 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                     ) from ex
         else:
             try:
-                crf_spec = getattr(crf, f"{type}{submission_year}")
+                crf_spec = getattr(crf, f"{submission_type}{submission_year}")
             except Exception as ex:
                 raise ValueError(  # noqa: TRY003
-                    f"No terminology exists for {type}{submission_year}"
+                    f"No terminology exists for {submission_type}{submission_year}"
                 ) from ex
 
         if totest:
@@ -137,17 +138,18 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
             ]
         print(
             f"The following tables are available in the "
-            f"{type}{submission_year} specification: {tables}"
+            f"{submission_type}{submission_year} specification: {tables}"
         )
         print("#" * 80)
 
         try:
             submission_date = get_latest_date_for_country(
-                current_country_code, submission_year, type=type
+                current_country_code, submission_year, submission_type=submission_type
             )
         except Exception:
             message = (
-                f"No submissions for country {country_name}, {type}{submission_year}"
+                f"No submissions for country {country_name}, "
+                f"{submission_type}{submission_year}"
             )
             print(message)
             exceptions.append(f"No_sub: {country_name}: {message}")
@@ -169,7 +171,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                         date=submission_date,
                         data_year=[data_year],
                         debug=True,
-                        type=type,
+                        submission_type=submission_type,
                     )
 
                     # collect messages on unknown rows etc
@@ -195,12 +197,12 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                         submission_year,
                         meta_data_input={
                             "title": f"Data submitted in {submission_year} to the "
-                            f"UNFCCC in the {type_name} ({type}) "
+                            f"UNFCCC in the {type_name} ({submission_type}) "
                             f"by {country_name}. "
                             f"Submission date: {submission_date}"
                         },
                         entity_mapping=entity_mapping,
-                        type=type,
+                        submission_type=submission_type,
                     )
 
                     # now convert to native PRIMAP2 format
@@ -240,27 +242,28 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                     else:
                         ds_all = ds_all.combine_first(ds_table_pm2)
                 except Exception as e:
-                    message = f"Error occured when converting table {table} for"
-                    f" {country_name} to PRIMAP2 IF. Exception: {e}"
+                    message = (
+                        f"Error occured when converting table {table} for"
+                        f" {country_name} to PRIMAP2 IF. Exception: {e}"
+                    )
                     print(message)
                     exceptions.append(f"Error: {country_name}: {message}")
                     pass
 
     # process log messages.
     today = date.today()
+    output_folder = log_path / f"test_read_{submission_type}{submission_year}"
+    if not output_folder.exists():
+        output_folder.mkdir()
     if len(unknown_categories) > 0:
         if country_code is not None:
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_unknown_categories_{country_code}"
+                output_folder / f"{data_year}_unknown_categories_{country_code}"
                 f"_{today.strftime('%Y-%m-%d')}.csv"
             )
         else:
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_unknown_categories_"
+                output_folder / f"{data_year}_unknown_categories_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
         print(f"Unknown rows found. Savin log to {log_location}")
@@ -269,35 +272,41 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     if len(last_row_info) > 0:
         if country_code is not None:
             log_location = (
-                log_path
-                / f"{type}{submission_year}"
-                / f"{data_year}_last_row_info_{country_code}_"
+                output_folder / f"{data_year}_last_row_info_{country_code}_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
         else:
             log_location = (
-                log_path / f"{type}{submission_year}" / f"{data_year}_last_row_info_"
+                output_folder / f"{data_year}_last_row_info_"
                 f"{today.strftime('%Y-%m-%d')}.csv"
             )
         print(f"Data found in the last row. Saving log to " f"{log_location}")
         save_last_row_info(last_row_info, log_location)
 
+    # write exceptions
+    f_ex = open(
+        output_folder / f"{data_year}_exceptions_{today.strftime('%Y-%m-%d')}.txt", "w"
+    )
+    for ex in exceptions:
+        f_ex.write(f"{ex}\n")
+    f_ex.close()
+
     # save the data:
     print(f"Save dataset to log folder: {log_path}")
     compression = dict(zlib=True, complevel=9)
-    output_folder = log_path / f"test_read_{type}{submission_year}"
+
     if country_code is not None:
         output_filename = (
-            f"{type}{submission_year}_{country_code}_" f"{today.strftime('%Y-%m-%d')}"
+            f"{submission_type}{submission_year}_{country_code}_"
+            f"{today.strftime('%Y-%m-%d')}"
         )
     else:
-        output_filename = f"{type}{submission_year}_{today.strftime('%Y-%m-%d')}"
+        output_filename = (
+            f"{submission_type}{submission_year}_{today.strftime('%Y-%m-%d')}"
+        )
     if totest:
         output_filename = output_filename + "_totest"
 
-    if not output_folder.exists():
-        output_folder.mkdir()
-
     # write data in interchange format
     pm2.pm2io.write_interchange_format(
         output_folder / output_filename, ds_all.pr.to_interchange_format()
@@ -307,12 +316,6 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
     encoding = {var: compression for var in ds_all.data_vars}
     ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
 
-    # write exceptions
-    f_ex = open(output_folder / f"exceptions_{output_filename}.txt", "w")
-    for ex in exceptions:
-        f_ex.write(f"{ex}\n")
-    f_ex.close()
-
     return ds_all
 
 

+ 17 - 11
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py

@@ -140,7 +140,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         country_name,
         submission_year=submission_year,
         submission_date=submission_date,
-        type=type,
+        submission_type=type,
         verbose=True,
     )
 
@@ -151,7 +151,11 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         for table in tables:
             # read table for all years
             ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
-                country_code, table, submission_year, date=submission_date, type=type
+                country_code,
+                table,
+                submission_year,
+                date=submission_date,
+                submission_type=type,
             )  # , data_year=[1990])
 
             # collect messages on unknown rows etc
@@ -177,7 +181,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
                     f"Submission date: {submission_date}"
                 },
                 entity_mapping=entity_mapping,
-                type=type,
+                submission_type=type,
             )
 
             # now convert to native PRIMAP2 format
@@ -499,7 +503,7 @@ def read_new_crf_for_year_datalad(  # noqa: PLR0912
                     country_info["name"],
                     submission_year=submission_year,
                     submission_date=country_info["date"],
-                    type=type,
+                    submission_type=type,
                     verbose=False,
                 )
                 if not data_read:
@@ -603,7 +607,7 @@ def get_input_and_output_files_for_country(
         country_codes=country_code,
         submission_year=submission_year,
         date=submission_date,
-        type=type,
+        submission_type=type,
     )
     if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
@@ -648,7 +652,7 @@ def submission_has_been_read(  # noqa: PLR0913
     country_name: str,
     submission_year: int,
     submission_date: str,
-    type: str = "CRF",
+    submission_type: str = "CRF",
     verbose: Optional[bool] = True,
 ) -> bool:
     """
@@ -664,7 +668,7 @@ def submission_has_been_read(  # noqa: PLR0913
         year of submissions for CRF or submission round for CRT
     submission_date
         date of submission (as in the filename)
-    type: str: default "CRF"
+    submission_type: str: default "CRF"
         CRF or CRT
     verbose: bool (optional, default True)
         if True print additional output
@@ -674,7 +678,9 @@ def submission_has_been_read(  # noqa: PLR0913
     True if data has been read, False otherwise
     """
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
-    output_filename = f"{country_code}_{type}{submission_year}_{submission_date}"
+    output_filename = (
+        f"{country_code}_{submission_type}{submission_year}_{submission_date}"
+    )
 
     #    check if the submission_year is correctly used for CRT
     if output_folder.exists():
@@ -685,14 +691,14 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
                 print(
                     f"Data already available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}."
+                    f"{submission_type}{submission_year}, version {submission_date}."
                 )
         elif existing_suffixes:
             has_been_read = False
             if verbose:
                 print(
                     f"Partial data available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {submission_date}. "
                     "Please check if all files have been written after "
                     f"reading. Existing suffixes: {existing_suffixes}"
                 )
@@ -701,7 +707,7 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
                 print(
                     f"No read data available for {country_code}, "
-                    f"{type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {submission_date}. "
                 )
     else:
         has_been_read = False