1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import os
- from src.faostat_data_primap.download import download_all_domains
- from src.faostat_data_primap.read import read_latest_data
- # test the complete download and read process
- def test_download_all_domains(tmp_path):
- downloaded_data_path = tmp_path / "downloaded_data"
- download_all_domains(downloaded_data_path=downloaded_data_path)
- expected_downloaded_domains = [
- "farm_gate_emissions_crops",
- "farm_gate_livestock",
- "farm_gate_agriculture_energy",
- "land_use_forests",
- "land_use_fires",
- "land_use_drained_organic_soils",
- "pre_post_agricultural_production",
- ]
- domains = []
- for domain in downloaded_data_path.iterdir():
- if domain.is_dir():
- domains.append(domain.name)
- for release in domain.iterdir():
- downloaded_data = os.listdir(release)
- # make sure we have at least one .csv, one .pdf and one .zip file
- assert [f for f in downloaded_data if f.endswith(".csv")]
- assert [f for f in downloaded_data if f.endswith(".pdf")]
- assert [f for f in downloaded_data if f.endswith(".zip")]
- assert sorted(expected_downloaded_domains) == sorted(domains)
- extracted_data_path = tmp_path / "extracted_data"
- # read and save latest data
- read_latest_data(
- downloaded_data_path_custom=downloaded_data_path, save_path=extracted_data_path
- )
- release_folder = os.listdir(extracted_data_path)
- # there should be one directory created
- assert len(release_folder) == 1
- # and it starts with "v" (the date changes with each release)
- assert release_folder[0].startswith("v")
- output_files = os.listdir(extracted_data_path / release_folder[0])
- # in the folder there should be three files
- assert len(output_files) == 3
- # a .yaml, .csv, and .nc file
- required_extensions = {"nc", "csv", "yaml"}
- file_extensions = {file.split(".")[-1] for file in output_files}
- assert required_extensions == file_extensions
|