|
@@ -1,9 +1,6 @@
|
|
|
import pytest
|
|
|
-import requests
|
|
|
|
|
|
from src.faostat_data_primap.download import (
|
|
|
- calculate_checksum,
|
|
|
- download_methodology,
|
|
|
find_previous_release_path,
|
|
|
)
|
|
|
|
|
@@ -159,103 +156,3 @@ def test_find_previous_release_path_wrong_dir_format(
|
|
|
result = find_previous_release_path(current_release_path) # noqa: F841
|
|
|
|
|
|
assert str(excinfo.value) == error_msg
|
|
|
-
|
|
|
-
|
|
|
-def test_calculate_checksum(tmp_path):
|
|
|
- filepath_a = tmp_path / "test_file_a.txt"
|
|
|
- with open(filepath_a, "w") as f:
|
|
|
- f.write("content of file a")
|
|
|
-
|
|
|
- filepath_b = tmp_path / "test_file_b.txt"
|
|
|
- with open(filepath_b, "w") as f:
|
|
|
- f.write("content of file a")
|
|
|
-
|
|
|
- filepath_c = tmp_path / "test_file_c.txt"
|
|
|
- with open(filepath_c, "w") as f:
|
|
|
- f.write("content of file c")
|
|
|
-
|
|
|
- checksum_a = calculate_checksum(filepath_a)
|
|
|
-
|
|
|
- checksum_b = calculate_checksum(filepath_b)
|
|
|
-
|
|
|
- checksum_c = calculate_checksum(filepath_c)
|
|
|
-
|
|
|
- assert checksum_a == checksum_b
|
|
|
-
|
|
|
- assert checksum_b != checksum_c
|
|
|
-
|
|
|
-
|
|
|
-def test_file_exists_in_previous_release_and_is_the_same(temp_domain_directories):
|
|
|
- # set up temporary directories
|
|
|
- downloaded_data_path = temp_domain_directories["downloaded_data"]
|
|
|
- domain_path = temp_domain_directories["domain_paths"][
|
|
|
- 0
|
|
|
- ] # farm_gate_emissions_crops
|
|
|
-
|
|
|
- # make folders for different releases
|
|
|
- for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
|
|
|
- release_path = domain_path / release
|
|
|
- release_path.mkdir()
|
|
|
-
|
|
|
- file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
|
|
|
- response = requests.get(
|
|
|
- "https://files-faostat.fao.org/production/GCE/GCE_e.pdf",
|
|
|
- stream=True,
|
|
|
- timeout=30,
|
|
|
- )
|
|
|
- response.raise_for_status() # Check for successful request
|
|
|
- with open(file_to_compare_path, "wb") as f:
|
|
|
- f.write(response.content)
|
|
|
- save_path = downloaded_data_path / "farm_gate_emissions_crops" / "2024-11-29"
|
|
|
- download_methodology(
|
|
|
- "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
|
|
|
- )
|
|
|
- downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
|
|
|
- assert downloaded_file_path.is_symlink()
|
|
|
-
|
|
|
-
|
|
|
-def test_methodology_document_exists_in_previous_release_but_is_different(
|
|
|
- temp_domain_directories,
|
|
|
-):
|
|
|
- # set up temporary directories
|
|
|
- domain_path = temp_domain_directories["domain_paths"][
|
|
|
- 0
|
|
|
- ] # farm_gate_emissions_crops
|
|
|
-
|
|
|
- # make folders for different releases
|
|
|
- for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
|
|
|
- release_path = domain_path / release
|
|
|
- release_path.mkdir()
|
|
|
-
|
|
|
- file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
|
|
|
- with open(file_to_compare_path, "wb") as f:
|
|
|
- s = "hi"
|
|
|
- f.write(s.encode("utf-8"))
|
|
|
-
|
|
|
- save_path = domain_path / "2024-11-29"
|
|
|
- download_methodology(
|
|
|
- "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
|
|
|
- )
|
|
|
- downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
|
|
|
- assert downloaded_file_path.exists()
|
|
|
-
|
|
|
-
|
|
|
-def test_methodology_document_does_not_exist_in_previous_release(
|
|
|
- temp_domain_directories,
|
|
|
-):
|
|
|
- # set up temporary directories
|
|
|
- domain_path = temp_domain_directories["domain_paths"][
|
|
|
- 0
|
|
|
- ] # farm_gate_emissions_crops
|
|
|
-
|
|
|
- # make folders for different releases
|
|
|
- for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
|
|
|
- release_path = domain_path / release
|
|
|
- release_path.mkdir()
|
|
|
-
|
|
|
- save_path = domain_path / "2024-11-29"
|
|
|
- download_methodology(
|
|
|
- "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
|
|
|
- )
|
|
|
- downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
|
|
|
- assert downloaded_file_path.exists()
|