Browse Source

[DATALAD] Recorded changes

Daniel Busch 3 months ago
parent
commit
6a25dc105e
2 changed files with 0 additions and 176 deletions
  1. 0 73
      src/faostat_data_primap/download.py
  2. 0 103
      tests/unit/test_download.py

+ 0 - 73
src/faostat_data_primap/download.py

@@ -1,6 +1,5 @@
 """Downloads data from FAOSTAT website."""
 
-import hashlib
 import os
 import pathlib
 import time
@@ -18,78 +17,6 @@ from faostat_data_primap.helper.definitions import domains
 from faostat_data_primap.helper.paths import downloaded_data_path
 
 
-def find_previous_release_path(
-    current_release_path: pathlib.Path,
-) -> pathlib.Path | None:
-    """
-    Find the most recent previous release directory within same domain
-
-    Release directories are assumed to be subdirectories within the same parent
-    directory as `current_release_path`. The Sorting is done alphabetically,
-    so directory names should follow the naming convention YYYY-MM-DD
-
-    Parameters
-    ----------
-    current_release_path : pathlib.Path
-        The path of the current release directory.
-
-    Returns
-    -------
-    pathlib.Path or None
-        Returns the path of the most recent previous release directory if one exists,
-        otherwise returns None.
-    """
-    domain_path = current_release_path.parent
-    all_releases = [
-        release_name
-        for release_name in os.listdir(current_release_path.parent)
-        if (domain_path / release_name).is_dir()
-    ]
-
-    # make sure all directories follow the naming convention
-    try:
-        all_releases_datetime = [
-            datetime.strptime(release, "%Y-%m-%d") for release in all_releases
-        ]
-    except ValueError as e:
-        msg = (
-            "All release folders must be in YYYY-MM-DD format, "
-            f"got {sorted(all_releases)}"
-        )
-        raise ValueError(msg) from e
-
-    all_releases_datetime = sorted(all_releases_datetime)
-    current_release_datetime = datetime.strptime(current_release_path.name, "%Y-%m-%d")
-    index = all_releases_datetime.index(current_release_datetime)
-
-    # if the current release is the latest or the only one
-    if index == 0:
-        return None
-
-    return domain_path / all_releases_datetime[index - 1].strftime("%Y-%m-%d")
-
-
-def calculate_checksum(file_path: pathlib.Path) -> str:
-    """
-    Calculate the SHA-256 checksum of a file.
-
-    Parameters
-    ----------
-    file_path : pathlib.Path
-        The path to the file for which the checksum is calculated.
-
-    Returns
-    -------
-    str
-        The SHA-256 checksum of the file as a hexadecimal string.
-    """
-    sha256 = hashlib.sha256()
-    with open(file_path, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            sha256.update(chunk)
-    return sha256.hexdigest()
-
-
 def download_methodology(url_download: str, save_path: pathlib.Path) -> None:
     """
     Download methodology file.

+ 0 - 103
tests/unit/test_download.py

@@ -1,9 +1,6 @@
 import pytest
-import requests
 
 from src.faostat_data_primap.download import (
-    calculate_checksum,
-    download_methodology,
     find_previous_release_path,
 )
 
@@ -159,103 +156,3 @@ def test_find_previous_release_path_wrong_dir_format(
         result = find_previous_release_path(current_release_path)  # noqa: F841
 
     assert str(excinfo.value) == error_msg
-
-
-def test_calculate_checksum(tmp_path):
-    filepath_a = tmp_path / "test_file_a.txt"
-    with open(filepath_a, "w") as f:
-        f.write("content of file a")
-
-    filepath_b = tmp_path / "test_file_b.txt"
-    with open(filepath_b, "w") as f:
-        f.write("content of file a")
-
-    filepath_c = tmp_path / "test_file_c.txt"
-    with open(filepath_c, "w") as f:
-        f.write("content of file c")
-
-    checksum_a = calculate_checksum(filepath_a)
-
-    checksum_b = calculate_checksum(filepath_b)
-
-    checksum_c = calculate_checksum(filepath_c)
-
-    assert checksum_a == checksum_b
-
-    assert checksum_b != checksum_c
-
-
-def test_file_exists_in_previous_release_and_is_the_same(temp_domain_directories):
-    # set up temporary directories
-    downloaded_data_path = temp_domain_directories["downloaded_data"]
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    response = requests.get(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf",
-        stream=True,
-        timeout=30,
-    )
-    response.raise_for_status()  # Check for successful request
-    with open(file_to_compare_path, "wb") as f:
-        f.write(response.content)
-    save_path = downloaded_data_path / "farm_gate_emissions_crops" / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.is_symlink()
-
-
-def test_methodology_document_exists_in_previous_release_but_is_different(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    with open(file_to_compare_path, "wb") as f:
-        s = "hi"
-        f.write(s.encode("utf-8"))
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()
-
-
-def test_methodology_document_does_not_exist_in_previous_release(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()