test_download_and_read.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import os
  2. from src.faostat_data_primap.download import download_all_domains
  3. from src.faostat_data_primap.read import read_latest_data
  4. # test the complete download and read process
  5. def test_download_all_domains(tmp_path):
  6. downloaded_data_path = tmp_path / "downloaded_data"
  7. download_all_domains(downloaded_data_path=downloaded_data_path)
  8. expected_downloaded_domains = [
  9. "farm_gate_emissions_crops",
  10. "farm_gate_livestock",
  11. "farm_gate_agriculture_energy",
  12. "land_use_forests",
  13. "land_use_fires",
  14. "land_use_drained_organic_soils",
  15. "pre_post_agricultural_production",
  16. ]
  17. domains = []
  18. for domain in downloaded_data_path.iterdir():
  19. if domain.is_dir():
  20. domains.append(domain.name)
  21. for release in domain.iterdir():
  22. downloaded_data = os.listdir(release)
  23. # make sure we have at least one .csv, one .pdf and one .zip file
  24. assert [f for f in downloaded_data if f.endswith(".csv")]
  25. assert [f for f in downloaded_data if f.endswith(".pdf")]
  26. assert [f for f in downloaded_data if f.endswith(".zip")]
  27. assert sorted(expected_downloaded_domains) == sorted(domains)
  28. extracted_data_path = tmp_path / "extracted_data"
  29. # read and save latest data
  30. read_latest_data(
  31. downloaded_data_path_custom=downloaded_data_path, save_path=extracted_data_path
  32. )
  33. release_folder = os.listdir(extracted_data_path)
  34. # there should be one directory created
  35. assert len(release_folder) == 1
  36. # and it starts with "v" (the date changes with each release)
  37. assert release_folder[0].startswith("v")
  38. output_files = os.listdir(extracted_data_path / release_folder[0])
  39. # in the folder there should be three files
  40. assert len(output_files) == 3
  41. # a .yaml, .csv, and .nc file
  42. required_extensions = {"nc", "csv", "yaml"}
  43. file_extensions = {file.split(".")[-1] for file in output_files}
  44. assert required_extensions == file_extensions