dodo.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. # we need this for the download script
  6. # def get_output_folders(domains_and_releases_to_read):
  7. # """Get the paths of folders where output files will be saved"""
  8. # output_folders = []
  9. # # todo remove hard coded key
  10. # for domain, release in domains_and_releases_to_read["2024"]:
  11. # # todo pathlib Path
  12. # output_folders.append(f"downloaded_data/{domain}/{release}")
  13. # return output_folders
  14. def task_test_basic_target():
  15. """
  16. test
  17. """
  18. def do_nothing():
  19. pass
  20. return {"actions": [do_nothing]}
  21. def task_download():
  22. """
  23. test datalad target
  24. """
  25. def datalad_run_download():
  26. datalad.api.run(
  27. cmd="python3 scripts/download_all_domains.py", outputs="downloaded_data"
  28. )
  29. return {"actions": [datalad_run_download]}
  30. def task_read():
  31. """
  32. read data set
  33. """
  34. def read_dataset(save_path, run_id):
  35. print(f"Reading dataset for {save_path=} and {run_id=}")
  36. cmd = (
  37. f"python3 scripts/read_data_set.py "
  38. f"--save_path {save_path} --run_id {run_id}"
  39. )
  40. datalad.api.run(
  41. cmd=cmd,
  42. message="Read data set",
  43. outputs=f"{save_path}",
  44. )
  45. return {
  46. "actions": [read_dataset],
  47. "params": [
  48. {
  49. "name": "save_path",
  50. "short": "s",
  51. "long": "save_path",
  52. "default": "extracted_data",
  53. "help": "Path to save the data.",
  54. },
  55. {
  56. "name": "run_id",
  57. "long": "run_id",
  58. "short": "r",
  59. "default": "2024",
  60. "help": "Run identifier.",
  61. },
  62. ],
  63. "verbosity": 2,
  64. }