download_all_domains.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. """Downloads all domain data sets from FAOSTAT website."""
  2. from src.faostat_data_primap.download import (
  3. download_file,
  4. get_html_content,
  5. get_last_updated_date,
  6. unzip_file,
  7. )
  8. from src.faostat_data_primap.helper.definitions import downloaded_data_path, sources
  9. def download_all_domains(sources: list[tuple[str]]):
  10. """
  11. Download input files from a remote location
  12. Parameters
  13. ----------
  14. download_path
  15. Name of data set, url to domain overview,
  16. and download url
  17. Returns
  18. -------
  19. List of input files that have been fetched or found locally.
  20. """
  21. downloaded_files = []
  22. for (
  23. ds_name,
  24. url,
  25. url_download,
  26. ) in sources:
  27. soup = get_html_content(url)
  28. # todo Remove url input
  29. last_updated = get_last_updated_date(soup, url)
  30. if not downloaded_data_path.exists():
  31. downloaded_data_path.mkdir()
  32. ds_path = downloaded_data_path / ds_name
  33. if not ds_path.exists():
  34. ds_path.mkdir()
  35. local_data_dir = ds_path / last_updated
  36. if not local_data_dir.exists():
  37. local_data_dir.mkdir()
  38. local_filename = local_data_dir / f"{ds_name}.zip"
  39. download_file(url_download=url_download, save_path=local_filename)
  40. downloaded_files.append(str(local_filename))
  41. unzip_file(local_filename)
  42. return downloaded_files
  43. if __name__ == "__main__":
  44. download_all_domains(sources)