test_download_and_read.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import os
  2. from faostat_data_primap.download import download_all_domains
  3. from faostat_data_primap.read import read_latest_data
  4. # test the complete download and read process
  5. # This will fail when there is a new release that does
  6. # not have a corresponding configuration
  7. def test_download_all_domains(tmp_path):
  8. downloaded_data_path = tmp_path / "downloaded_data"
  9. download_all_domains(downloaded_data_path=downloaded_data_path)
  10. expected_downloaded_domains = [
  11. "farm_gate_emissions_crops",
  12. "farm_gate_livestock",
  13. "farm_gate_agriculture_energy",
  14. "land_use_forests",
  15. "land_use_fires",
  16. "land_use_drained_organic_soils",
  17. "pre_post_agricultural_production",
  18. ]
  19. domains = []
  20. for domain in downloaded_data_path.iterdir():
  21. if domain.is_dir():
  22. domains.append(domain.name)
  23. for release in domain.iterdir():
  24. downloaded_data = os.listdir(release)
  25. # make sure we have at least one .csv, one .pdf and one .zip file
  26. assert [f for f in downloaded_data if f.endswith(".csv")]
  27. assert [f for f in downloaded_data if f.endswith(".pdf")]
  28. assert [f for f in downloaded_data if f.endswith(".zip")]
  29. assert sorted(expected_downloaded_domains) == sorted(domains)
  30. extracted_data_path = tmp_path / "extracted_data"
  31. # read and save latest data
  32. read_latest_data(
  33. downloaded_data_path_custom=downloaded_data_path, save_path=extracted_data_path
  34. )
  35. release_folder = os.listdir(extracted_data_path)
  36. # there should be one directory created
  37. assert len(release_folder) == 1
  38. # and it starts with "v" (the date changes with each release)
  39. assert release_folder[0].startswith("v")
  40. output_files = os.listdir(extracted_data_path / release_folder[0])
  41. # in the folder there should be three files
  42. assert len(output_files) == 6
  43. # a .yaml, .csv, and .nc file
  44. required_extensions = {"nc", "csv", "yaml"}
  45. file_extensions = {file.split(".")[-1] for file in output_files}
  46. assert required_extensions == file_extensions