Forráskód Böngészése

added test for get_info_from_crf_filename and chencge method to get troot path

Johannes Gütschow 4 hónapja
szülő
commit
6f7c08c2a1

+ 40 - 11
src/unfccc_ghg_data/helper/definitions.py

@@ -1,21 +1,50 @@
 """definitions like folders, mappings etc."""
 
-import os
 from pathlib import Path
 
 import pandas as pd
 
 
-def get_root_path() -> Path:
-    """Get the root_path from an environment variable"""
-    root_path_env = os.getenv("UNFCCC_GHG_ROOT_PATH", None)
-    if root_path_env is None:
-        raise ValueError(  # noqa: TRY003
-            "UNFCCC_GHG_ROOT_PATH environment variable needs to be set"
-        )
-    else:
-        root_path = Path(root_path_env).resolve()
-    return root_path
+def get_root_path(root_indicator: str = ".datalad") -> Path:
+    """
+    Traverse up from the current script location to find the repository root.
+
+    The root is defined by the presence of a root_indicator file or
+    directory (e.g., '.git').
+
+    Parameters
+    ----------
+        root_indicator
+            A filename or directory name that indicates the root of the repository.
+
+    Returns
+    -------
+    Path
+        The path to the root directory of the repository.
+
+    Raises
+    ------
+        RuntimeError: If the repository root cannot be found.
+    """
+    current_dir = Path(__file__).resolve().parent
+    while current_dir != Path(current_dir.root):
+        if (current_dir / root_indicator).exists():
+            return current_dir
+        current_dir = current_dir.parent
+    msg = f"Repository root with indicator '{root_indicator}' not found."
+    raise RuntimeError(msg)
+
+
+# def get_root_path() -> Path:
+#     """Get the root_path from an environment variable"""
+#     root_path_env = os.getenv("UNFCCC_GHG_ROOT_PATH", None)
+#     if root_path_env is None:
+#         raise ValueError(
+#             "UNFCCC_GHG_ROOT_PATH environment variable needs to be set"
+#         )
+#     else:
+#         root_path = Path(root_path_env).resolve()
+#     return root_path
 
 
 root_path = get_root_path()

+ 1 - 1
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -863,7 +863,7 @@ def get_info_from_crf_filename(
         name_parts = filename.split("-")
         file_info["party"] = name_parts[0]
         file_info["submission_year"] = int(name_parts[2])
-        file_info["version"] = int(name_parts[3])
+        file_info["version"] = name_parts[3]
         try:
             file_info["data_year"] = int(name_parts[4])
         except:  # noqa: E722

+ 28 - 0
tests/unit/test_crf_reader.py

@@ -0,0 +1,28 @@
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_core import (
+    get_info_from_crf_filename,
+)
+
+
+def test_get_info_from_crf_filename():
+    # crf
+    filename = "BLR_2021_1990_30032021_192048.xlsx"
+    expected = {
+        "party": "BLR",
+        "submission_year": 2021,
+        "data_year": 1990,
+        "date": "30032021",
+        "extra": "192048",
+    }
+    assert expected == get_info_from_crf_filename(filename)
+
+    # crt
+    filename = "GUY-CRT-2024-V0.3-1992-20240927-191031_started.xlsx"
+    expected = {
+        "party": "GUY",
+        "submission_year": 2024,
+        "data_year": 1992,
+        "date": "20240927",
+        "extra": "191031_started",
+        "version": "V0.3",
+    }
+    assert expected == get_info_from_crf_filename(filename)