dodo.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. def get_output_folders(domains_and_releases_to_read):
  6. """Get the paths of folders where output files will be saved"""
  7. output_folders = []
  8. # todo remove hard coded key
  9. for domain, release in domains_and_releases_to_read["2024"]:
  10. # todo pathlib Path
  11. output_folders.append(f"downloaded_data/{domain}/{release}")
  12. return output_folders
  13. def task_test_basic_target():
  14. """
  15. test
  16. """
  17. def do_nothing():
  18. pass
  19. return {"actions": [do_nothing]}
  20. def task_test_download_target():
  21. """
  22. test datalad target
  23. """
  24. def datalad_run_download():
  25. datalad.api.run(cmd="python3 scripts/download_all_domains.py")
  26. return {"actions": [datalad_run_download]}
  27. def task_read_data():
  28. """
  29. read data set
  30. """
  31. def read_dataset(save_path, run_id):
  32. # output_folders = get_output_folders(domains_and_releases_to_read)
  33. print(f"Reading dataset for {save_path=} and {run_id=}")
  34. cmd = (
  35. f"python3 scripts/read_data_set.py "
  36. f"--save_path {save_path} --run_id {run_id}"
  37. )
  38. datalad.api.run(
  39. cmd=cmd,
  40. message="Read data set",
  41. outputs=f"{save_path}",
  42. )
  43. return {
  44. "actions": [read_dataset],
  45. "params": [
  46. {
  47. "name": "save_path",
  48. "short": "s",
  49. "long": "save_path",
  50. "default": "extracted_data",
  51. "help": "Path to save the data.",
  52. },
  53. {
  54. "name": "run_id",
  55. "long": "run_id",
  56. "short": "r",
  57. "default": "2024",
  58. "help": "Run identifier.",
  59. },
  60. ],
  61. "verbosity": 2,
  62. }