dodo.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
  6. def get_output_folders(domains_and_releases_to_read):
  7. """Get the paths of folders where output files will be saved"""
  8. output_folders = []
  9. # todo remove hard coded key
  10. for domain, release in domains_and_releases_to_read["2024"]:
  11. # todo pathlib Path
  12. output_folders.append(f"downloaded_data/{domain}/{release}")
  13. return output_folders
  14. def task_test_basic_target():
  15. """
  16. test
  17. """
  18. def do_nothing():
  19. pass
  20. return {"actions": [do_nothing]}
  21. def task_test_download_target():
  22. """
  23. test datalad target
  24. """
  25. def datalad_run_download():
  26. datalad.api.run(cmd="python3 scripts/download_all_domains.py")
  27. return {"actions": [datalad_run_download]}
  28. def task_read_data():
  29. """
  30. read data set
  31. """
  32. def read_dataset(save_path, run_id):
  33. output_folders = get_output_folders(domains_and_releases_to_read)
  34. print(f"Reading dataset for {save_path=} and {run_id=}")
  35. cmd = (
  36. f"python3 scripts/read_data_set.py "
  37. f"--save_path {save_path} --run_id {run_id}"
  38. )
  39. datalad.api.run(
  40. cmd=cmd,
  41. message="Read data set",
  42. outputs=output_folders,
  43. )
  44. return {
  45. "actions": [read_dataset],
  46. "params": [
  47. {
  48. "name": "save_path",
  49. "short": "s",
  50. "default": "extracted_data",
  51. "help": "Path to save the data.",
  52. },
  53. {
  54. "name": "run_id",
  55. "short": "r",
  56. "default": "2024",
  57. "help": "Run identifier.",
  58. },
  59. ],
  60. "verbosity": 2,
  61. }