Browse Source

Handle BTR/CRT submissions by version not by date.
update jinja2 to 3.1.5 because of vulnerability in lower versions.

Johannes Gütschow 3 months ago
parent
commit
a3a1f2d611

+ 6 - 1
dodo.py

@@ -405,6 +405,7 @@ def task_read_unfccc_submission():
 read_config_crf = {
     "country": get_var("country", None),
     "submission_year": get_var("submission_year", None),
+    "submission_version": get_var("version", None),
     "submission_date": get_var("submission_date", None),
     "re_read": get_var("re_read", False),
     "countries": get_var("countries", None),
@@ -422,10 +423,14 @@ def task_read_unfccc_crf_submission():
             re_read = True
         else:
             re_read = False
+        if read_config_crf["type"] == "CRF":
+            date_or_version = read_config_crf["submission_date"]
+        else:
+            date_or_version = read_config_crf["submission_version"]
         read_crf_for_country_datalad(
             read_config_crf["country"],
             submission_year=int(read_config_crf["submission_year"]),
-            submission_date=read_config_crf["submission_date"],
+            date_or_version=date_or_version,
             re_read=re_read,
             type=read_config_crf["type"],
         )

+ 4 - 4
poetry.lock

@@ -1806,13 +1806,13 @@ trio = ["async_generator", "trio"]
 
 [[package]]
 name = "jinja2"
-version = "3.1.4"
+version = "3.1.5"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
-    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+    {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
+    {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
 ]
 
 [package.dependencies]
@@ -5488,4 +5488,4 @@ plots = ["matplotlib"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10, <3.11"
-content-hash = "44c78f6efd614c74eaf64117bed8474edcc0c1ec99c897dffb9204904f1486b0"
+content-hash = "9668c2630766d6cd925bff6d0ba61660315bdf9e8eca079ca481e080206cd4c7"

+ 1 - 0
pyproject.toml

@@ -27,6 +27,7 @@ sphinx-exec-code = "^0.10"
 ghostscript = "^0.7"
 sphinx-markdown-tables = "^0.0.17"
 xarray = ">=2024.6.0"
+jinja2 = "3.1.5"
 
 [tool.poetry.extras]
 plots = ["matplotlib"]

+ 17 - 10
src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py

@@ -11,7 +11,6 @@ TODO: sort importing and move to datasets folder
 TODO: add datalad get to obtain the input files
 """
 
-
 import argparse
 from datetime import date
 from pathlib import Path
@@ -32,11 +31,11 @@ if __name__ == "__main__":
     parser.add_argument("--type", help="CRF or CRT tables", default="CRF")
     args = parser.parse_args()
     submission_year = args.submission_year
-    type = args.type
+    submission_type = args.type
 
-    if type == "CRF":
+    if submission_type == "CRF":
         countries = all_crf_countries
-    elif type == "CRT":
+    elif submission_type == "CRT":
         countries = all_countries
     else:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -49,17 +48,23 @@ if __name__ == "__main__":
         # determine folder
         try:
             country_info = get_input_and_output_files_for_country(
-                country, submission_year=submission_year, type=type, verbose=False
+                country,
+                submission_year=submission_year,
+                submission_type=submission_type,
+                verbose=False,
             )
 
+            if submission_type == "CRF":
+                date_or_version = country_info["date"]
+            else:
+                date_or_version = country_info["version"]
             # check if the latest submission has been read already
-
             data_read = submission_has_been_read(
                 country_info["code"],
                 country_info["name"],
                 submission_year=submission_year,
-                submission_date=country_info["date"],
-                submission_type=type,
+                date_or_version=date_or_version,
+                submission_type=submission_type,
                 verbose=False,
             )
             if not data_read:
@@ -96,8 +101,10 @@ if __name__ == "__main__":
     today = date.today()
 
     compression = dict(zlib=True, complevel=9)
-    output_folder = dataset_path_UNFCCC / f"{type}{submission_year}"
-    output_filename = f"{type}{submission_year}_raw_{today.strftime('%Y-%m-%d')}"
+    output_folder = dataset_path_UNFCCC / f"{submission_type}{submission_year}"
+    output_filename = (
+        f"{submission_type}{submission_year}_raw_{today.strftime('%Y-%m-%d')}"
+    )
 
     if not output_folder.exists():
         output_folder.mkdir()

+ 65 - 1
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py

@@ -39,7 +39,6 @@ TODO:
 
 """
 
-
 from .util import unit_info
 
 gwp_to_use = "AR5GWP100"
@@ -433,6 +432,15 @@ CRT1 = {
             ],
             # SGP
             ["Residual fuel oil", ["1.A.2.g.vii", "ResFuelOil"], 4],
+            # CAN
+            [
+                "Biodiesel (5 percent fossil portion)",
+                ["1.A.2.g.vii", "OLBiodieselFC"],
+                4,
+            ],
+            ["Lubricating Oil (Two-Stroke Engines)", ["1.A.2.g.vii", "Lubricants"], 4],
+            # FIN
+            ["Gasoil", ["1.A.2.g.vii", "Gasoil"], 4],
             ["Gaseous fuels (6)", ["1.A.2.g.vii", "Gaseous"], 3],
             ["Other fossil fuels (7)", ["1.A.2.g.vii", "OtherFF"], 3],
             ["Biomass (3)", ["1.A.2.g.vii", "Biomass"], 3],
@@ -498,6 +506,14 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.1", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.1", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.1", "Biomass"], 4],
+            # LTU
+            ["Non-specified industry", ["1.A.2.g.viii.1", "Total"], 3],
+            ["Liquid fuels", ["1.A.2.g.viii.1", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.1", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.1", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.1", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.1", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.1", "Biomass"], 4],
             # MLT
             ["All Industry", ["1.A.2.g.viii.2", "Total"], 3],
             ["Liquid fuels", ["1.A.2.g.viii.2", "Liquid"], 4],
@@ -522,6 +538,22 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.3", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.3", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.3", "Biomass"], 4],
+            # BEL
+            ["Other non-specified", ["1.A.2.g.viii.4", "Total"], 3],
+            ["Liquid fuels", ["1.A.2.g.viii.4", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.4", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.4", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.4", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.4", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.4", "Biomass"], 4],
+            # CZE
+            ["Other non_specified", ["1.A.2.g.viii.4", "Total"], 3],
+            ["Liquid fuels", ["1.A.2.g.viii.4", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.4", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.4", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.4", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.4", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.4", "Biomass"], 4],
             # NLD
             ["Other Industrial Sectors", ["1.A.2.g.viii.4", "Total"], 3],
             ["Liquid fuels", ["1.A.2.g.viii.4", "Liquid"], 4],
@@ -530,6 +562,18 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.4", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.4", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.4", "Biomass"], 4],
+            # CHN
+            [
+                "Manufacturing industries which separate data are not available",
+                ["1.A.2.g.viii.4", "Total"],
+                3,
+            ],
+            ["Liquid fuels", ["1.A.2.g.viii.4", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.4", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.4", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.4", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.4", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.4", "Biomass"], 4],
             # RUS
             ["Other industries", ["1.A.2.g.viii.4", "Total"], 3],
             ["Liquid fuels", ["1.A.2.g.viii.4", "Liquid"], 4],
@@ -554,6 +598,14 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.10", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.10", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.10", "Biomass"], 4],
+            # HRV
+            ["other", ["1.A.2.g.viii.10", "Total"], 3],
+            ["Liquid fuels", ["1.A.2.g.viii.10", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.10", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.10", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.10", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.10", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.10", "Biomass"], 4],
             # SGP
             ["Others", ["1.A.2.g.viii.10", "Total"], 3],
             ["Liquid fuels", ["1.A.2.g.viii.10", "Liquid"], 4],
@@ -593,6 +645,18 @@ CRT1 = {
             ["Other fossil fuels (7)", ["1.A.2.g.viii.13", "OtherFF"], 4],
             ["Peat (8)", ["1.A.2.g.viii.13", "Peat"], 4],
             ["Biomass (3)", ["1.A.2.g.viii.13", "Biomass"], 4],
+            # USA
+            [
+                "Construction, agriculture, and other non-transport vehicles",
+                ["1.A.2.g.viii.11", "Total"],
+                3,
+            ],
+            ["Liquid fuels", ["1.A.2.g.viii.11", "Liquid"], 4],
+            ["Solid fuels", ["1.A.2.g.viii.11", "Solid"], 4],
+            ["Gaseous fuels (6)", ["1.A.2.g.viii.11", "Gaseous"], 4],
+            ["Other fossil fuels (7)", ["1.A.2.g.viii.11", "OtherFF"], 4],
+            ["Peat (8)", ["1.A.2.g.viii.11", "Peat"], 4],
+            ["Biomass (3)", ["1.A.2.g.viii.11", "Biomass"], 4],
         ],
         "entity_mapping": {
             "EMISSIONS CH4": "CH4",

+ 1 - 4
src/unfccc_ghg_data/unfccc_crf_reader/read_new_unfccc_crf_for_year.py

@@ -15,9 +15,6 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     # parser.add_argument('--countries', help='List of country codes', default=None)
     parser.add_argument("--submission_year", help="Submission round to read", type=int)
-    parser.add_argument(
-        "--submission_date", help="Date of submission to read", default=None
-    )
     parser.add_argument(
         "--re_read", help="Read data also if already read before", action="store_true"
     )
@@ -36,5 +33,5 @@ if __name__ == "__main__":
         submission_year=int(submission_year),
         #    countries=countries,
         re_read=re_read,
-        type=type,
+        submission_type=type,
     )

+ 7 - 7
src/unfccc_ghg_data/unfccc_crf_reader/read_unfccc_crf_submission.py

@@ -16,7 +16,7 @@ if __name__ == "__main__":
     parser.add_argument("--country", help="Country name or code")
     parser.add_argument("--submission_year", help="Submission round to read", type=int)
     parser.add_argument(
-        "--submission_date", help="Date of submission to read", default=None
+        "--date_or_version", help="Date or version of submission to read", default=None
     )
     parser.add_argument(
         "--re_read", help="Read data also if already read before", action="store_true"
@@ -27,16 +27,16 @@ if __name__ == "__main__":
 
     country = args.country
     submission_year = args.submission_year
-    submission_date = args.submission_date
+    date_or_version = args.date_or_version
     re_read = args.re_read
-    type = args.type
-    if submission_date == "None":
-        submission_date = None
+    submission_type = args.type
+    if date_or_version == "None":
+        date_or_version = None
 
     read_crf_for_country(
         country,
         submission_year=submission_year,
-        submission_date=submission_date,
+        date_or_version=date_or_version,
         re_read=re_read,
-        type=type,
+        submission_type=submission_type,
     )

+ 346 - 140
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -25,13 +25,13 @@ from treelib import Tree
 from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC, root_path
 
 from . import crf_specifications as crf
-from .util import NoCRFFilesError
+from .util import BTR_urls, NoCRFFilesError
 
 pd.set_option("future.no_silent_downcasting", True)
 
 
 ### reading functions
-def convert_crf_table_to_pm2if(  # noqa: PLR0913
+def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913
     df_table: pd.DataFrame,
     submission_year: int,
     entity_mapping: dict[str, str] | None = None,
@@ -104,7 +104,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
     add_coords_cols = {
         #    "orig_cat_name": ["orig_cat_name", "category"],
     }
-    # TODO: fix this for CRT
+
     coords_terminologies = {
         "area": "ISO3",
         "category": category_terminology,
@@ -146,8 +146,6 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
             filter_keep[key] = filter_keep_input[key]
 
     meta_data = {
-        "references": f"https://unfccc.int/ghg-inventories-annex-i-parties/"
-        f"{submission_year}",
         "rights": "",
         "contact": "mail@johannes-guetschow.de",
         "title": title,
@@ -155,6 +153,22 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0913
         "institution": "United Nations Framework Convention on Climate Change "
         "(www.unfccc.int)",
     }
+    if submission_type == "CRF":
+        meta_data[
+            "references"
+        ] = f"https://unfccc.int/ghg-inventories-annex-i-parties/{submission_year}"
+    elif submission_year in BTR_urls.keys():
+        meta_data["references"] = BTR_urls[submission_year]
+    elif meta_data_input is not None:
+        if "references" not in meta_data_input.keys():
+            raise ValueError(  # noqa: TRY003
+                f"Submission round {submission_year} unknown, please add metadata."
+            )
+    else:
+        raise ValueError(  # noqa: TRY003
+            f"Submission round {submission_year} unknown, please add metadata."
+        )
+
     if meta_data_input is not None:
         for key in meta_data_input.keys():
             meta_data[key] = meta_data_input[key]
@@ -180,7 +194,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     table: str,
     submission_year: int,
     data_year: int | list[int] | None = None,
-    date: str | None = None,
+    date_or_version: str | None = None,
     folder: str | None = None,
     submission_type: str = "CRF",
     debug: bool = False,
@@ -207,13 +221,14 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
     data_year: int or List of int (optional)
         if int a single data year will be read. if a list of ints is given these
         years will be read. If no nothing is given all data years will be read
-    date: str (optional, default is "latest")
-        readonly submission from the given date
+    date_or_version: str (optional, default is None)
+        readonly submission from the given date (CRF) or version (CRT/BTR)
+        use "latest" to read the latest submissions
     folder: str (optional)
         Folder that contains the xls files. If not given folders are determined by the
         submissions_year and country_code variables
     submission_type: str default = "CRF"
-        read CRF or CRF data
+        read CRF or CRT/BTR data
     debug: bool (optional)
         if true print some debug information like column headers
 
@@ -239,51 +254,53 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
         country_codes=country_codes,
         submission_year=submission_year,
         data_year=data_year,
-        date=date,
+        date_or_version=date_or_version,
         folder=folder,
         submission_type=submission_type,
     )
     # nasty fix for cases where exporting ran overnight and not all files have
-    # the same date
-    if (date is not None) and (len(country_codes) == 1):
-        if isinstance(data_year, list):
-            expected_files = len(data_year)
-        elif isinstance(data_year, int):
-            expected_files = 1
-        else:
-            expected_files = submission_year - 1990 - 1
-        if len(input_files) < expected_files:
-            print(
-                f"Found only {len(input_files)} input files for {country_codes}. "
-                f"Expected {expected_files}."
-            )
-            print(
-                "Possibly exporting run overnight and some files have the previous "
-                "day as date."
-            )
-            date_datetime = datetime.strptime(date, "%d%m%Y")
-            date_datetime = date_datetime - timedelta(days=1)
-            prv_date = date_datetime.strftime("%d%m%Y")
-            more_input_files = get_crf_files(
-                country_codes=country_codes,
-                submission_year=submission_year,
-                data_year=data_year,
-                date=prv_date,
-                folder=folder,
-                submission_type=submission_type,
-            )
-            if len(more_input_files) > 0:
-                print(f"Found {len(more_input_files)} additional input files.")
-                input_files = input_files + more_input_files
+    # the same date_or_version. This is only applied for CRF as for CRT we use the
+    # version as main identifier
+    if submission_type == "CRF":
+        if (date_or_version is not None) and (len(country_codes) == 1):
+            if isinstance(data_year, list):
+                expected_files = len(data_year)
+            elif isinstance(data_year, int):
+                expected_files = 1
             else:
-                print("Found no additional input files")
+                expected_files = submission_year - 1990 - 1
+            if len(input_files) < expected_files:
+                print(
+                    f"Found only {len(input_files)} input files for {country_codes}. "
+                    f"Expected {expected_files}."
+                )
+                print(
+                    "Possibly exporting run overnight and some files have the previous "
+                    "day as date."
+                )
+                date_datetime = datetime.strptime(date_or_version, "%d%m%Y")
+                date_datetime = date_datetime - timedelta(days=1)
+                prv_date = date_datetime.strftime("%d%m%Y")
+                more_input_files = get_crf_files(
+                    country_codes=country_codes,
+                    submission_year=submission_year,
+                    data_year=data_year,
+                    date_or_version=prv_date,
+                    folder=folder,
+                    submission_type=submission_type,
+                )
+                if len(more_input_files) > 0:
+                    print(f"Found {len(more_input_files)} additional input files.")
+                    input_files = input_files + more_input_files
+                else:
+                    print("Found no additional input files")
 
     if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
             f"No files found for {country_codes}, "
             f"submission_year={submission_year}, "
             f"data_year={data_year}, "
-            f"date={date}, "
+            f"date_or_version={date_or_version}, "
             f"folder={folder}."
         )
 
@@ -303,17 +320,17 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
             # no country specific specification, check for general specification
             try:
                 crf_spec = getattr(crf, f"{submission_type}{submission_year}")
-            except:  # noqa: E722
-                raise ValueError(  # noqa: TRY003, TRY200
+            except Exception as ex:
+                raise ValueError(  # noqa: TRY003
                     f"No terminology exists for submission year " f"{submission_year}"
-                )
+                ) from ex
     else:
         try:
             crf_spec = getattr(crf, f"{submission_type}{submission_year}")
-        except:  # noqa: E722
-            raise ValueError(  # noqa: TRY003, TRY200
+        except Exception as ex:
+            raise ValueError(  # noqa: TRY003
                 f"No terminology exists for submission year " f"{submission_year}"
-            )
+            ) from ex
 
     # now loop over files and read them
     df_all = None
@@ -699,7 +716,7 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     country_codes: Union[str, list[str]],
     submission_year: int,
     data_year: Optional[Union[int, list[int]]] = None,
-    date: Optional[str] = None,
+    date_or_version: Optional[str] = None,
     folder: Optional[str] = None,
     submission_type: str = "CRF",
 ) -> list[Path]:
@@ -710,21 +727,17 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     ----------
     country_codes: str or list[str]
         ISO 3-letter country code or list of country codes
-
     submission_year: int
-        Year of the submission of the data
-
+        Year of the submission of the data for CRF and submission round for CRT/BTR
     data_year: int or List of int (optional)
         if int a single data year will be read. if a list of ints is given these
         years will be read. If no nothing is given all data years will be read
-
-    date: str (optional, default is "latest")
-        readonly submission from the given date
-
+    date_or_version: str (optional, default is None)
+        get files only for submission from the given date (CRF) or version (CRT/BTR)
+        Use "latest" to get files for the latest submission
     folder: str (optional)
-        Folder that contains the xls files. If not given fodlers are determined by the
+        Folder that contains the xls files. If not given folders are determined by the
         submissions_year and country_code variables
-
     submission_type: str default = "CRF"
         read CRF or CRF data
 
@@ -732,11 +745,6 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     -------
         List[Path]: list of Path objects for the files
     """
-    if submission_type == "CRT":
-        type_folder = "BTR"
-    else:
-        type_folder = submission_type
-
     if isinstance(country_codes, str):
         country_codes = [country_codes]
     input_files = []
@@ -745,33 +753,12 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     # we should only have files for one country and submission in the folder. But the
     # function can also be used on a given folder and then the filter is useful.
     if folder is None:
-        data_folder = downloaded_data_path_UNFCCC
-        submission_folder = f"{type_folder}{submission_year}"
-
-        with open(data_folder / "folder_mapping.json") as mapping_file:
-            folder_mapping = json.load(mapping_file)
-
         # use country default folders
-        country_folders = []
-        for country_code in country_codes:
-            if country_code in folder_mapping:
-                new_country_folders = folder_mapping[country_code]
-                if isinstance(new_country_folders, str):
-                    # only one folder
-                    country_folders = [
-                        *country_folders,
-                        data_folder / new_country_folders / submission_folder,
-                    ]
-                else:
-                    country_folders = country_folders + [
-                        data_folder / folder / submission_folder
-                        for folder in new_country_folders
-                    ]
-            else:
-                raise ValueError(  # noqa: TRY003
-                    f"No data folder found for country {country_code}. "
-                    f"Check if folder mapping is up to date."
-                )
+        country_folders = get_country_folders(
+            country_codes,
+            submission_year=submission_year,
+            submission_type=submission_type,
+        )
     else:
         country_folders = [folder]
 
@@ -788,26 +775,47 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     for input_folder in country_folders:
         input_folder_path = Path(input_folder)
         if input_folder_path.exists():
-            # if desired find the latest date and only read that
+            # if desired find the latest date_or_version and only read that
             # has to be done per country
-            if date == "latest":
-                for country in country_codes:
+            if submission_type == "CRF":
+                if date_or_version == "latest":
+                    for country in country_codes:
+                        file_filter = file_filter_template.copy()
+                        file_filter["party"] = country
+                        dates = get_submission_dates(folder, file_filter)
+                        file_filter["date"] = find_latest_date(dates)
+                        input_files = input_files + filter_filenames(
+                            input_folder_path.glob("*.xlsx"), **file_filter
+                        )
+                else:
                     file_filter = file_filter_template.copy()
-                    file_filter["party"] = country
-                    dates = get_submission_dates(folder, file_filter)
-                    file_filter["date"] = find_latest_date(dates)
+                    if date_or_version is not None:
+                        file_filter["date"] = date_or_version
+                    input_files = input_files + filter_filenames(
+                        input_folder_path.glob("*.xlsx"), **file_filter
+                    )
+            elif submission_type == "CRT":
+                if date_or_version == "latest":
+                    for country in country_codes:
+                        file_filter = file_filter_template.copy()
+                        file_filter["party"] = country
+                        versions = get_submission_versions(folder, file_filter)
+                        file_filter["version"] = find_latest_version(versions)
+                        input_files = input_files + filter_filenames(
+                            input_folder_path.glob("*.xlsx"), **file_filter
+                        )
+                else:
+                    file_filter = file_filter_template.copy()
+                    if date_or_version is not None:
+                        file_filter["version"] = date_or_version
                     input_files = input_files + filter_filenames(
                         input_folder_path.glob("*.xlsx"), **file_filter
                     )
             else:
-                file_filter = file_filter_template.copy()
-                if date is not None:
-                    file_filter["date"] = date
-                input_files = input_files + filter_filenames(
-                    input_folder_path.glob("*.xlsx"), **file_filter
+                raise ValueError(  # noqa: TRY003
+                    f"Unknown submissions type: {submission_type}."
+                    "Only CRF and CRT are allowed."
                 )
-        # else:
-        #    raise ValueError(f"Folder {input_folder} does not exist")
     if len(input_files) == 0:
         raise ValueError(f"No input files found in {country_folders}")  # noqa: TRY003
 
@@ -823,6 +831,61 @@ def get_crf_files(  # noqa: PLR0912, PLR0913
     return unique_files
 
 
+def get_country_folders(
+    country_codes: Union[str, list[str]],
+    submission_year: int,
+    submission_type: str = "CRF",
+) -> list[Path]:
+    """
+    get folders which contain CRF or BTR/CRT submissions for given countries
+
+    Parameters
+    ----------
+    country_codes :
+        ISO 3-letter country code or list of country codes
+    submission_year :
+        Year of the submission of the data for CRF and submission round for CRT/BTR
+    submission_type :
+        read CRF or CRF data
+
+    Returns
+    -------
+        List[Path]: list of Path objects for the folders
+
+    """
+    if submission_type == "CRT":
+        type_folder = "BTR"
+    else:
+        type_folder = submission_type
+    data_folder = downloaded_data_path_UNFCCC
+    submission_folder = f"{type_folder}{submission_year}"
+
+    with open(data_folder / "folder_mapping.json") as mapping_file:
+        folder_mapping = json.load(mapping_file)
+
+    country_folders = []
+    for country_code in country_codes:
+        if country_code in folder_mapping:
+            new_country_folders = folder_mapping[country_code]
+            if isinstance(new_country_folders, str):
+                # only one folder
+                country_folders = [
+                    *country_folders,
+                    data_folder / new_country_folders / submission_folder,
+                ]
+            else:
+                country_folders = country_folders + [
+                    data_folder / folder / submission_folder
+                    for folder in new_country_folders
+                ]
+        else:
+            raise ValueError(  # noqa: TRY003
+                f"No data folder found for country {country_code}. "
+                f"Check if folder mapping is up to date_or_version."
+            )
+    return country_folders
+
+
 def get_info_from_crf_filename(  # noqa: PLR0912
     filename: str,
 ) -> dict[str, Union[int, str]]:
@@ -840,7 +903,7 @@ def get_info_from_crf_filename(  # noqa: PLR0912
         * party: the party that submitted the data (3 letter code)
         * submission_year: year of submission
         * data_year: year in which the emissions took place
-        * date: date of the submission
+        * date_or_version: date_or_version of the submission
         * extra: rest of the file name
 
     """
@@ -856,12 +919,13 @@ def get_info_from_crf_filename(  # noqa: PLR0912
         except:  # noqa: E722
             print(f"Data year string {name_parts[2]} could not be converted to int.")
             file_info["data_year"] = name_parts[2]
-        file_info["date"] = name_parts[3]
+        file_info["date_or_version"] = name_parts[3]
         # the last part (time code) is missing for CRT tables in CRF sile format
         if len(name_parts) > 4:  # noqa: PLR2004
             file_info["extra"] = name_parts[4]
         else:
             file_info["extra"] = ""
+        file_info["version"] = "V0.0"
     else:
         # not enough parts, we probably have a CRT file with different separator
         name_parts = filename.split("-")
@@ -878,7 +942,7 @@ def get_info_from_crf_filename(  # noqa: PLR0912
                         "could not be converted to int."
                     )
                     file_info["data_year"] = name_parts[4]
-                file_info["date"] = name_parts[5]
+                file_info["date_or_version"] = name_parts[5]
                 # treat time code and note as optional
                 if len(name_parts) > 6:  # noqa: PLR2004
                     file_info["extra"] = name_parts[6]
@@ -894,12 +958,13 @@ def get_info_from_crf_filename(  # noqa: PLR0912
     return file_info
 
 
-def filter_filenames(
+def filter_filenames(  # noqa: PLR0913
     files_to_filter: list[Path] | Generator[Path, None, None],
     party: Optional[Union[str, list[str]]] = None,
     data_year: Optional[Union[int, list[int]]] = None,
     submission_year: Optional[str] = None,
     date: Optional[str] = None,
+    version: Optional[str] = None,
 ) -> list[Path]:
     """Filter a list of filenames of CRF/CRT files
 
@@ -907,21 +972,23 @@ def filter_filenames(
     ----------
     files_to_filter: List[Path]
         List with pathlib.Path objects for the filenames to filter
-
     party: Optional[Union[str, List[str]]] (default: None)
         List of country codes or single country code. If given only files
         for this(these) country-code(s) will be returned.
-
     data_year: Optional[Union[int, List[int]]] (default: None)
         List of data years or single year. If given only files for this
         (these) year(s) will be returned
-
     submission_year: Optional[str] (default: None)
         List of submission years or single year. If given only files with the
         given submission year(s) will be returned
-
     date: Optional[str] (default: None)
         Date. If given only files with the given submission date will be returned
+    version: Optional[str] (default: None)
+        Date. If given only files with the given submission version (CRT/BTR)
+
+    Returns
+    -------
+        list with pathlib Path objects for the files matching the filter
 
     """
     file_filter = {}
@@ -932,7 +999,9 @@ def filter_filenames(
     if data_year is not None:
         file_filter["data_year"] = data_year
     if date is not None:
-        file_filter["date"] = date
+        file_filter["date_or_version"] = date
+    if version is not None:
+        file_filter["version"] = version
 
     filtered_files = []
     for file in files_to_filter:
@@ -947,7 +1016,7 @@ def filter_filenames(
     return filtered_files
 
 
-def check_crf_file_info(  # noqa: PLR0911
+def check_crf_file_info(  # noqa: PLR0911, PLR0912
     file_info: dict,
     file_filter: dict,
 ) -> bool:
@@ -961,7 +1030,7 @@ def check_crf_file_info(  # noqa: PLR0911
         `get_info_from_crf_filename`
 
     file_filter: Dict
-        possible keys are `party`, `data_year`, `submission_year` and `date`
+        possible keys are `party`, `data_year`, `submission_year` and `date_or_version`
         with functionality as in `filter_filenames`
 
     Returns
@@ -972,8 +1041,11 @@ def check_crf_file_info(  # noqa: PLR0911
     if "submission_year" in file_filter.keys():
         if file_info["submission_year"] != file_filter["submission_year"]:
             return False
-    if "date" in file_filter.keys():
-        if file_info["date"] != file_filter["date"]:
+    if "date_or_version" in file_filter.keys():
+        if file_info["date_or_version"] != file_filter["date_or_version"]:
+            return False
+    if "version" in file_filter.keys():
+        if file_info["version"] != file_filter["version"]:
             return False
     if "data_year" in file_filter.keys():
         if isinstance(file_filter["data_year"], int):
@@ -1163,42 +1235,42 @@ def get_latest_date_for_country(
     submission_type: str = "CRF",
 ) -> str:
     """
-    Find the latest submission date for a country
+    Find the latest submission date_or_version (CRF) or version (CRT) for a country
 
     Parameters
     ----------
     country_code: str
         3-letter country code
     submission_year: int
-        Year of the submission to find the l;atest date for
+        Year of the submission to find the l;atest date_or_version for
     submission_type: str, default CRF
         Check for CRF or CRT tables
 
     Returns
     -------
-        str: string with date
+        str: string with date_or_version / version
     """
     with open(downloaded_data_path_UNFCCC / "folder_mapping.json") as mapping_file:
         folder_mapping = json.load(mapping_file)
 
-    if submission_type == "CRT":
-        type_folder = "BTR"
-        if country_code == "AUS" and submission_year == 1:
-            date_format = "%d%m%Y"
-        else:
-            date_format = "%Y%m%d"
-    else:
-        type_folder = submission_type
-        date_format = "%d%m%Y"
     if country_code in folder_mapping:
         file_filter = {
             "party": country_code,
         }
         if submission_type == "CRF":
+            type_folder = submission_type
+            date_format = "%d%m%Y"
             file_filter["submission_year"] = submission_year
-        # don't filter for submission year in BTR as it's  the actual year and
-        # not the submissions round (and we don't know yet if it will be the same
-        # for all submission in one submission round)
+        else:
+            type_folder = "BTR"
+            if country_code == "AUS" and submission_year == 1:
+                date_format = "%d%m%Y"
+            else:
+                date_format = "%Y%m%d"
+            # don't filter for submission year in BTR as it's the actual year and
+            # not the submissions round (and we don't know yet if it will be the same
+            # for all submission in one submission round)
+
         country_folders = folder_mapping[country_code]
         if isinstance(country_folders, str):
             # only one folder
@@ -1225,12 +1297,69 @@ def get_latest_date_for_country(
     else:
         raise ValueError(  # noqa: TRY003
             f"No data folder found for country {country_code}. "
-            f"Check if folder mapping is up to date."
+            f"Check if folder mapping is up to date_or_version."
         )
 
     return submission_date
 
 
+def get_latest_version_for_country(
+    country_code: str,
+    submission_round: int,
+) -> str:
+    """
+    Find the latest submission version (CRT) for a country
+
+    Parameters
+    ----------
+    country_code: str
+        3-letter country code
+    submission_round: int
+        Submission round to find the latest version for
+
+    Returns
+    -------
+        str: string with date_or_version / version
+    """
+    with open(downloaded_data_path_UNFCCC / "folder_mapping.json") as mapping_file:
+        folder_mapping = json.load(mapping_file)
+
+    if country_code in folder_mapping:
+        file_filter = {
+            "party": country_code,
+        }
+
+        country_folders = folder_mapping[country_code]
+        if isinstance(country_folders, str):
+            # only one folder
+            submission_version = find_latest_version(
+                get_submission_versions(
+                    downloaded_data_path_UNFCCC
+                    / country_folders
+                    / f"BTR{submission_round}",
+                    file_filter,
+                ),
+            )
+        else:
+            versions = []
+            for folder in country_folders:
+                folder_submission = (
+                    downloaded_data_path_UNFCCC / folder / f"BTR{submission_round}"
+                )
+                if folder_submission.exists():
+                    versions = versions + get_submission_dates(
+                        folder_submission, file_filter
+                    )
+            submission_version = find_latest_version(versions)
+    else:
+        raise ValueError(  # noqa: TRY003
+            f"No data folder found for country {country_code}. "
+            f"Check if folder mapping is up to date_or_version."
+        )
+
+    return submission_version
+
+
 def get_submission_dates(
     folder: Path,
     file_filter: dict[str, Union[str, int, list]],
@@ -1251,9 +1380,9 @@ def get_submission_dates(
         List[str]:
             List of dates as str
     """
-    if "date" in file_filter:
+    if "date_or_version" in file_filter:
         raise ValueError(  # noqa: TRY003
-            "'date' present in 'file_filter'. This makes no sense as "
+            "'date_or_version' present in 'file_filter'. This makes no sense as "
             "the function's purpose is to return available dates."
         )
 
@@ -1262,7 +1391,55 @@ def get_submission_dates(
     else:
         raise ValueError(f"Folder {folder} does not exist")  # noqa: TRY003
 
-    dates = [get_info_from_crf_filename(file.name)["date"] for file in files]
+    dates = [get_info_from_crf_filename(file.name)["date_or_version"] for file in files]
+    dates = list(set(dates))
+
+    return dates
+
+
+def get_submission_versions(
+    folder: Path,
+    file_filter: dict[str, Union[str, int, list]],
+) -> list[str]:
+    """
+    Return all submission versions available in a folder.
+
+    This function only works for CRT files as CRF files do not contain a version and
+    the field is filled with 0.0 for all CRF files.
+
+    There is one BTR submission where the CRT files use the CRF naming convention
+    and don't have a version number (Australia BTR1). It uses 0.0 as version number.
+
+    Parameters
+    ----------
+    folder: Path
+        Folder to analyze
+
+    file_filter: Dict[str, Union[str, int, List]]
+        Dict with possible fields "party", "submission_year", "data_year"
+
+    Returns
+    -------
+        List[str]:
+            List of versions as str
+    """
+    if "version" in file_filter:
+        raise ValueError(  # noqa: TRY003
+            "'version' present in 'file_filter'. This makes no sense as "
+            "the function's purpose is to return available versions."
+        )
+
+    if "CRF" in folder.name:
+        raise ValueError(  # noqa: TRY003
+            "'CRF' present in 'folder_name'. Function only works on CRT files"
+        )
+
+    if folder.exists():
+        files = filter_filenames(folder.glob("*.xlsx"), **file_filter)
+    else:
+        raise ValueError(f"Folder {folder} does not exist")  # noqa: TRY003
+
+    dates = [get_info_from_crf_filename(file.name)["version"] for file in files]
     dates = list(set(dates))
 
     return dates
@@ -1281,7 +1458,7 @@ def get_submission_parties(
         Folder to analyze
 
     file_filter: Dict[str, Union[str, int, List]]
-        Dict with possible fields "submission_year", "data_year", "date"
+        Dict with possible fields "submission_year", "data_year", "date_or_version"
 
     Returns
     -------
@@ -1310,19 +1487,20 @@ def find_latest_date(
     date_format: str = "%d%m%Y",
 ) -> str:
     """
-    Return the latest date in a list of dates as str in the format ddmmyyyy
+    Return the latest date_or_version in a list of dates as str in the format ddmmyyyy
 
     Parameters
     ----------
     dates: List[str]
         List of dates
     date_format: str, default "%d%m%Y"
-        Format for the date. Unfortunately CRF uses %d%m%Y while CRT uses %Y%m%d with
-        some exceptions for early submissions which use the CRF file namig scheme
+        Format for the date_or_version. Unfortunately CRF uses %d%m%Y while CRT uses
+        %Y%m%d with some exceptions for early submissions which use the CRF file namig
+        scheme
 
     Returns
     -------
-        str: latest date
+        str: latest date_or_version
     """
     if len(dates) > 0:
         dates_datetime = [
@@ -1333,3 +1511,31 @@ def find_latest_date(
         raise ValueError("Passed list of dates is empty")  # noqa: TRY003
 
     return dates_datetime[-1][0]
+
+
+def find_latest_version(
+    versions: list[str],
+) -> str:
+    """
+    Return the latest version in a list of versions as str
+
+    Parameters
+    ----------
+    dates: List[str]
+        List of dates
+    date_format: str, default "%d%m%Y"
+        Format for the date_or_version. Unfortunately CRF uses %d%m%Y while CRT uses
+        %Y%m%d with some exceptions for early submissions which use the CRF file namig
+        scheme
+
+    Returns
+    -------
+        str: latest date_or_version
+    """
+    if len(versions) > 0:
+        versions_float = [[version, float(version[1:])] for version in versions]
+        versions_float = sorted(versions_float, key=itemgetter(1))
+    else:
+        raise ValueError("Passed list of versions is empty")  # noqa: TRY003
+
+    return versions_float[-1][0]

+ 15 - 7
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py

@@ -21,6 +21,7 @@ from . import crf_specifications as crf
 from .unfccc_crf_reader_core import (
     convert_crf_table_to_pm2if,
     get_latest_date_for_country,
+    get_latest_version_for_country,
     read_crf_table,
 )
 from .util import all_crf_countries
@@ -143,9 +144,16 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
         print("#" * 80)
 
         try:
-            submission_date = get_latest_date_for_country(
-                current_country_code, submission_year, submission_type=submission_type
-            )
+            if submission_type == "CRF":
+                date_or_version = get_latest_date_for_country(
+                    current_country_code,
+                    submission_year,
+                    submission_type=submission_type,
+                )
+            else:
+                date_or_version = get_latest_version_for_country(
+                    current_country_code, submission_year
+                )
         except Exception:
             message = (
                 f"No submissions for country {country_name}, "
@@ -153,10 +161,10 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
             )
             print(message)
             exceptions.append(f"No_sub: {country_name}: {message}")
-            submission_date = None
+            date_or_version = None
             pass
 
-        if submission_date is not None:
+        if date_or_version is not None:
             for table in tables:
                 try:
                     # read table for given years
@@ -168,7 +176,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                         current_country_code,
                         table,
                         submission_year,
-                        date=submission_date,
+                        date_or_version=date_or_version,
                         data_year=[data_year],
                         debug=True,
                         submission_type=submission_type,
@@ -199,7 +207,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
                             "title": f"Data submitted in {submission_year} to the "
                             f"UNFCCC in the {type_name} ({submission_type}) "
                             f"by {country_name}. "
-                            f"Submission date: {submission_date}"
+                            f"Submission date / version: {date_or_version}"
                         },
                         entity_mapping=entity_mapping,
                         submission_type=submission_type,

+ 117 - 81
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py

@@ -25,6 +25,7 @@ from .unfccc_crf_reader_core import (
     convert_crf_table_to_pm2if,
     get_crf_files,
     get_latest_date_for_country,
+    get_latest_version_for_country,
     read_crf_table,
 )
 from .unfccc_crf_reader_devel import save_last_row_info, save_unknown_categories_info
@@ -48,9 +49,9 @@ from .util import NoCRFFilesError, all_crf_countries
 def read_crf_for_country(  # noqa: PLR0912, PLR0915
     country_code: str,
     submission_year: int,
-    submission_date: Optional[str] = None,
+    date_or_version: Optional[str] = None,
     re_read: Optional[bool] = True,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> xr.Dataset:
     """
     Read for given submission year and country.
@@ -77,16 +78,17 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
 
     Parameters
     ----------
-    country_codes: str
+    country_code: str
         ISO 3-letter country code
     submission_year: int
         Year of the submission of the data
-    submission_data: Optional(str)
-        Read for a specific submission date (given as string as in the file names)
+    date_or_version: Optional(str)
+        Read for a specific submission date (CRF) or version (CRT/BTR)
+        (given as string as in the file names)
         If not specified latest data will be read
     re_read: Optional(bool) default: True
         Read the data also if it's already present
-    type: str default "CRF"
+    submission_type: str default "CRF"
         Read CRF or CRT
 
     Returns
@@ -94,9 +96,9 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         return value is a Pandas DataFrame with the read data in PRIMAP2 format
     """
     # long name for type
-    if type == "CRF":
+    if submission_type == "CRF":
         type_name = "common reporting format"
-    elif type == "CRT":
+    elif submission_type == "CRT":
         type_name = "common reporting tables"
     else:
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
@@ -107,15 +109,15 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
     # if we only have a single country check if we might have a country specific
     # specification (currently only Australia, 2023)
     try:
-        crf_spec = getattr(crf, f"{type}{submission_year}_{country_code}")
+        crf_spec = getattr(crf, f"{submission_type}{submission_year}_{country_code}")
         print(
             f"Using country specific specification: "
-            f"{type}{submission_year}_{country_code}"
+            f"{submission_type}{submission_year}_{country_code}"
         )
     except Exception:
         # no country specific specification, check for general specification
         try:
-            crf_spec = getattr(crf, f"{type}{submission_year}")
+            crf_spec = getattr(crf, f"{submission_type}{submission_year}")
         except Exception as ex:
             raise ValueError(  # noqa: TRY003
                 f"No terminology exists for submission year/round " f"{submission_year}"
@@ -126,21 +128,29 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
     ]
     print(
         f"The following tables are available in the "
-        f"{type}{submission_year} specification: {tables}"
+        f"{submission_type}{submission_year} specification: {tables}"
     )
 
-    if submission_date is None:
-        submission_date = get_latest_date_for_country(
-            country_code, submission_year, type
-        )
+    if date_or_version is None:
+        if submission_type == "CRF":
+            date_or_version = get_latest_date_for_country(
+                country_code,
+                submission_year=submission_year,
+                submission_type=submission_type,
+            )
+        else:
+            date_or_version = get_latest_version_for_country(
+                country_code,
+                submission_round=submission_year,
+            )
 
     # check if data has been read already
     read_data = not submission_has_been_read(
         country_code,
         country_name,
         submission_year=submission_year,
-        submission_date=submission_date,
-        submission_type=type,
+        date_or_version=date_or_version,
+        submission_type=submission_type,
         verbose=True,
     )
 
@@ -154,9 +164,9 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
                 country_code,
                 table,
                 submission_year,
-                date=submission_date,
-                submission_type=type,
-            )  # , data_year=[1990])
+                date_or_version=date_or_version,
+                submission_type=submission_type,
+            )
 
             # collect messages on unknown rows etc
             unknown_categories = unknown_categories + new_unknown_categories
@@ -172,16 +182,24 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
                 entity_mapping = crf_spec[table]["entity_mapping"]
             else:
                 entity_mapping = None
+            if submission_type == "CRF":
+                meta_data_input = {
+                    "title": f"CRF data submitted in {submission_year} to the UNFCCC "
+                    f"in the {type_name} ({submission_type}) by {country_name}. "
+                    f"Submission date: {date_or_version}"
+                }
+            else:
+                meta_data_input = {
+                    "title": f"Data submitted for round {submission_year} "
+                    f"to the UNFCCC in the {type_name} ({submission_type}) by "
+                    f"{country_name}. Submission version: {date_or_version}"
+                }
             ds_table_if = convert_crf_table_to_pm2if(
                 ds_table,
                 submission_year,
-                meta_data_input={
-                    "title": f"Data submitted in {submission_year} to the UNFCCC "
-                    f"in the {type_name} ({type}) by {country_name}. "
-                    f"Submission date: {submission_date}"
-                },
+                meta_data_input=meta_data_input,
                 entity_mapping=entity_mapping,
-                submission_type=type,
+                submission_type=submission_type,
             )
 
             # now convert to native PRIMAP2 format
@@ -228,7 +246,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
             today = date.today()
             log_location = (
                 log_path
-                / f"{type}{submission_year}"
+                / f"{submission_type}{submission_year}"
                 / f"{country_code}_unknown_categories_{today.strftime('%Y-%m-%d')}.csv"
             )
             print(
@@ -242,7 +260,7 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
             today = date.today()
             log_location = (
                 log_path
-                / f"{type}{submission_year}"
+                / f"{submission_type}{submission_year}"
                 / f"{country_code}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
             )
             print(
@@ -254,8 +272,11 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
         if save_data:
             compression = dict(zlib=True, complevel=9)
             output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
+            # TODO: function that creates the filename so if we modify something it's
+            #  modified everywhere (but will break old data, so better keep file name)
             output_filename = (
-                f"{country_code}_{type}{submission_year}_" f"{submission_date}"
+                f"{country_code}_{submission_type}{submission_year}_"
+                f"{date_or_version}"
             )
 
             if not output_folder.exists():
@@ -276,9 +297,9 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
 
 
 def read_crf_for_country_datalad(
-    country: str,
+    country_code: str,
     submission_year: int,
-    submission_date: Optional[str] = None,
+    date_or_version: Optional[str] = "latest",
     re_read: Optional[bool] = True,
     type: str = "CRF",
 ) -> None:
@@ -290,12 +311,13 @@ def read_crf_for_country_datalad(
 
     Parameters
     ----------
-    country_codes: str
+    country_code: str
         ISO 3-letter country code
     submission_year: int
         Year of the submission of the data
-    submission_date: Optional(str)
-        Read for a specific submission date (given as string as in the file names)
+    date_or_version: Optional(str)
+        Read for a specific submission date (CRF) or version (CRT/BTR)
+        (given as string as in the file names)
         If not specified latest data will be read
     type: str default "CRF"
         Read CRF or CRT
@@ -306,13 +328,13 @@ def read_crf_for_country_datalad(
         raise ValueError("Type must be CRF or CRT")  # noqa: TRY003
     # get all the info for the country
     country_info = get_input_and_output_files_for_country(
-        country,
+        country_code,
         submission_year=submission_year,
         verbose=True,
-        type=type,
+        submission_type=type,
     )
 
-    print(f"Attempting to read data for {type}{submission_year} from {country}.")
+    print(f"Attempting to read data for {type}{submission_year} from {country_code}.")
     print("#" * 80)
     print("")
     print("Using the unfccc_crf_reader")
@@ -322,9 +344,9 @@ def read_crf_for_country_datalad(
 
     cmd = (
         f"python3 {script.as_posix()} "
-        f"--country={country} "
+        f"--country={country_code} "
         f"--submission_year={submission_year} "
-        f"--submission_date={submission_date} "
+        f"--date_or_version={date_or_version} "
         f"--type={type}"
     )
     if re_read:
@@ -332,7 +354,8 @@ def read_crf_for_country_datalad(
     datalad.api.run(
         cmd=cmd,
         dataset=root_path,
-        message=f"Read data for {country}, {type}{submission_year}, {submission_date}.",
+        message=f"Read data for {country_code}, {type}{submission_year}, "
+        f"{date_or_version}.",
         inputs=country_info["input"],
         outputs=country_info["output"],
         dry_run=None,
@@ -344,7 +367,7 @@ def read_new_crf_for_year(
     submission_year: int,
     countries: list[str] | None = None,
     re_read: bool | None = False,
-    type: str = "CRF",
+    submission_type: str = "CRF",
 ) -> dict:
     """
     Read CRF for given countries
@@ -372,7 +395,7 @@ def read_new_crf_for_year(
         CRF countries
     re_read: bool (optional, default=False)
         If true data will be read even if already read before.
-    type: str default "CRF"
+    submission_type: str default "CRF"
         Read CRF or CRT
 
     TODO: write log with failed countries and what has been read
@@ -382,10 +405,10 @@ def read_new_crf_for_year(
         list[str]: list with country codes for which the data has been read
 
     """
-    if type == "CRF":
+    if submission_type == "CRF":
         if countries is None:
             countries = all_crf_countries
-    elif type == "CRT":
+    elif submission_type == "CRT":
         if countries is None:
             countries = all_countries
     else:
@@ -395,18 +418,21 @@ def read_new_crf_for_year(
     for country in countries:
         try:
             country_df = read_crf_for_country(
-                country, submission_year, re_read=re_read, type=type
+                country,
+                submission_year,
+                re_read=re_read,
+                submission_type=submission_type,
             )
             if country_df is None:
                 read_countries[country] = "skipped"
             else:
                 read_countries[country] = "read"
         except NoCRFFilesError:
-            print(f"No {type} data for country {country}, {submission_year}")
+            print(f"No {submission_type} data for country {country}, {submission_year}")
             read_countries[country] = "no data"
         except Exception as ex:
             print(
-                f"{type} data for country {country}, "
+                f"{submission_type} data for country {country}, "
                 f"{submission_year} could not be read"
             )
             print(f"The following error occurred: {ex}")
@@ -502,7 +528,7 @@ def read_new_crf_for_year_datalad(  # noqa: PLR0912
                     country_info["code"],
                     country_info["name"],
                     submission_year=submission_year,
-                    submission_date=country_info["date"],
+                    date_or_version=country_info["date"],
                     submission_type=type,
                     verbose=False,
                 )
@@ -539,13 +565,14 @@ def read_new_crf_for_year_datalad(  # noqa: PLR0912
     )
 
 
-def get_input_and_output_files_for_country(
+def get_input_and_output_files_for_country(  # noqa: PLR0912
     country: str,
     submission_year: int,
-    submission_date: Optional[str] = None,
-    type: str = "CRF",
+    date_or_version: Optional[str] = None,
+    submission_type: str = "CRF",
     verbose: Optional[bool] = True,
 ) -> dict[str, Union[list, str]]:
+    # TODO adapt to version us  for BTR
     """
     Get input and output files for a given country
 
@@ -555,16 +582,16 @@ def get_input_and_output_files_for_country(
         3 letter country code
     submission_year: int
         year of submissions for CRF or submission round for CRT
-    submission_date
-        date of submission (as in the filename)
-    type: str: default "CRF"
+    date_or_version
+        date (CRF) or version (CRT/BTR) of submission (as in the filename)
+    submission_type: str: default "CRF"
         CRF or CRT
     verbose: bool (optional, default True)
         if True print additional output
 
     Returns
     -------
-    dict with keays "input" and "output". Values are a list of files
+    dict with keys "input" and "output". Values are a list of files
     """
     country_info = {}
 
@@ -579,41 +606,50 @@ def get_input_and_output_files_for_country(
 
     # determine latest data
     print(f"Determining input and output files for {country}")
-    if submission_date is None:
+    if date_or_version is None:
         if verbose:
-            print("No submission date given, find latest date.")
-        submission_date = get_latest_date_for_country(
-            country_code, submission_year, type=type
-        )
+            print("No submission date/version given, find latest.")
+        if submission_type == "CRF":
+            date_or_version = get_latest_date_for_country(
+                country_code, submission_year, submission_type=submission_type
+            )
+        else:
+            date_or_version = get_latest_version_for_country(
+                country_code, submission_year
+            )
     elif verbose:
-        print(f"Using given submissions date {submission_date}")
+        print(f"Using given submissions date / version {date_or_version}")
 
-    if submission_date is None:
+    if date_or_version is None:
         # there is no data. Raise an exception
         raise NoCRFFilesError(  # noqa: TRY003
             f"No submissions found for {country_code}, "
-            f"type={type}, "
+            f"submission_type={submission_type}, "
             f"submission_year={submission_year}, "
-            f"date={date}"
+            f"date_or_version={date_or_version}"
         )
     elif verbose:
         print(
-            f"Latest submission date for {type}{submission_year} is {submission_date}"
+            f"Latest submission date / version for {submission_type}{submission_year} "
+            f"is {date_or_version}"
         )
-    country_info["date"] = submission_date
+    if submission_type == "CRF":
+        country_info["date"] = date_or_version
+    else:
+        country_info["version"] = date_or_version
 
     # get possible input files
     input_files = get_crf_files(
         country_codes=country_code,
         submission_year=submission_year,
-        date=submission_date,
-        submission_type=type,
+        date_or_version=date_or_version,
+        submission_type=submission_type,
     )
     if not input_files:
         raise NoCRFFilesError(  # noqa: TRY003
-            f"No possible input files found for {country}, {type}{submission_year}, "
-            f"v{submission_date}. Are they already submitted and included in the "
-            f"repository?"
+            f"No possible input files found for {country}, {submission_type}"
+            f"{submission_year}, {date_or_version}. "
+            "Are they already submitted and included in the repository?"
         )
     elif verbose:
         print("Found the following input_files:")
@@ -628,8 +664,8 @@ def get_input_and_output_files_for_country(
     # get output file
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
     output_files = [
-        output_folder / f"{country_code}_{type}{submission_year}"
-        f"_{submission_date}.{suffix}"
+        output_folder / f"{country_code}_{submission_type}{submission_year}"
+        f"_{date_or_version}.{suffix}"
         for suffix in ["yaml", "csv", "nc"]
     ]
     if verbose:
@@ -651,7 +687,7 @@ def submission_has_been_read(  # noqa: PLR0913
     country_code: str,
     country_name: str,
     submission_year: int,
-    submission_date: str,
+    date_or_version: str,
     submission_type: str = "CRF",
     verbose: Optional[bool] = True,
 ) -> bool:
@@ -666,8 +702,8 @@ def submission_has_been_read(  # noqa: PLR0913
         Name of the country
     submission_year: int
         year of submissions for CRF or submission round for CRT
-    submission_date
-        date of submission (as in the filename)
+    date_or_version
+        date of submission (CRF) or version (CRT/BTR) (as in the filename)
     submission_type: str: default "CRF"
         CRF or CRT
     verbose: bool (optional, default True)
@@ -679,7 +715,7 @@ def submission_has_been_read(  # noqa: PLR0913
     """
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
     output_filename = (
-        f"{country_code}_{submission_type}{submission_year}_{submission_date}"
+        f"{country_code}_{submission_type}{submission_year}_{date_or_version}"
     )
 
     #    check if the submission_year is correctly used for CRT
@@ -691,14 +727,14 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
                 print(
                     f"Data already available for {country_code}, "
-                    f"{submission_type}{submission_year}, version {submission_date}."
+                    f"{submission_type}{submission_year}, version {date_or_version}."
                 )
         elif existing_suffixes:
             has_been_read = False
             if verbose:
                 print(
                     f"Partial data available for {country_code}, "
-                    f"{submission_type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {date_or_version}. "
                     "Please check if all files have been written after "
                     f"reading. Existing suffixes: {existing_suffixes}"
                 )
@@ -707,7 +743,7 @@ def submission_has_been_read(  # noqa: PLR0913
             if verbose:
                 print(
                     f"No read data available for {country_code}, "
-                    f"{submission_type}{submission_year}, version {submission_date}. "
+                    f"{submission_type}{submission_year}, version {date_or_version}. "
                 )
     else:
         has_been_read = False

+ 4 - 0
src/unfccc_ghg_data/unfccc_crf_reader/util.py

@@ -52,6 +52,10 @@ all_crf_countries = [
     "USA",  # 49
 ]
 
+BTR_urls = {
+    1: "https://unfccc.int/first-biennial-transparency-reports",
+}
+
 
 class NoCRFFilesError(Exception):
     """Error raised when no CRF files are found"""

+ 57 - 1
tests/unit/test_crf_reader.py

@@ -1,7 +1,13 @@
+from pathlib import Path
+
+from unfccc_ghg_data.helper import downloaded_data_path_UNFCCC
 from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_core import (
     filter_category,
+    find_latest_version,
+    get_country_folders,
     get_info_from_crf_filename,
     get_latest_date_for_country,
+    get_latest_version_for_country,
 )
 
 
@@ -17,11 +23,31 @@ def test_get_latest_date_for_country():
     assert date == expected
 
     # RUS CRT
-    expected = "20241108"
+    expected = "20241220"
     date = get_latest_date_for_country("RUS", 1, submission_type="CRT")
     assert date == expected
 
 
+def test_get_latest_version_for_country():
+    # AUS CRT
+    expected = "V0.0"
+    date = get_latest_version_for_country("AUS", 1)
+    assert date == expected
+
+    # RUS CRT
+    expected = "V1.0"
+    date = get_latest_version_for_country("RUS", 1)
+    assert date == expected
+
+
+def test_find_latest():
+    test_list = ["V0.01", "V0.3", "V0.2"]
+    expected = "V0.3"
+    version = find_latest_version(test_list)
+
+    assert version == expected
+
+
 def test_get_info_from_crf_filename():
     # crf
     filename = "BLR_2021_1990_30032021_192048.xlsx"
@@ -97,3 +123,33 @@ def test_filter_category():
     map_incl_single = ["\\C-AUS\\ Other (as specified in table 3(I).A)", ["3.A.1.C"], 5]
     assert filter_category(map_incl_single, "MOZ") == expected_remove
     assert filter_category(map_incl_single, "AUS") == expected
+
+
+def test_get_country_folders():
+    # BTR1
+    expected = [
+        Path("Russian_Federation/BTR1"),
+        Path("Australia/BTR1"),
+        Path("Guyana/BTR1"),
+    ]
+    folders = get_country_folders(
+        country_codes=["RUS", "AUS", "GUY"],
+        submission_year=1,
+        submission_type="CRT",
+    )
+    folders = [folder.relative_to(downloaded_data_path_UNFCCC) for folder in folders]
+    assert expected == folders
+
+    # CRF 2023
+    expected = [
+        Path("Russian_Federation/CRF2023"),
+        Path("Australia/CRF2023"),
+        Path("Germany/CRF2023"),
+    ]
+    folders = get_country_folders(
+        country_codes=["RUS", "AUS", "DEU"],
+        submission_year=2023,
+        submission_type="CRF",
+    )
+    folders = [folder.relative_to(downloaded_data_path_UNFCCC) for folder in folders]
+    assert expected == folders