dodo.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
  6. def get_output_folders(domains_and_releases_to_read):
  7. """Get the paths of folders where output files will be saved"""
  8. output_folders = []
  9. # todo remove hard coded key
  10. for domain, release in domains_and_releases_to_read["2024"]:
  11. # todo pathlib Path
  12. output_folders.append(f"downloaded_data/{domain}/{release}")
  13. return output_folders
  14. def task_test_basic_target():
  15. """
  16. test
  17. """
  18. def do_nothing():
  19. pass
  20. return {"actions": [do_nothing]}
  21. def task_test_download_target():
  22. """
  23. test datalad target
  24. """
  25. def datalad_run_download():
  26. datalad.api.run(cmd="python3 scripts/download_all_domains.py")
  27. return {"actions": [datalad_run_download]}
  28. def task_read_data():
  29. """
  30. read data set
  31. """
  32. def read_dataset(save_path, run_id):
  33. output_folders = get_output_folders(domains_and_releases_to_read)
  34. cmd = (
  35. f"python3 scripts/read_data_set.py "
  36. f"--save_path {save_path} --run_id {run_id}"
  37. )
  38. datalad.api.run(
  39. cmd=cmd,
  40. message="Read data set",
  41. outputs=output_folders,
  42. )
  43. return {
  44. "actions": [read_dataset],
  45. "params": [
  46. {
  47. "name": "save_path",
  48. "short": "s",
  49. "default": "/extracted_data",
  50. "help": "Path to save the data.",
  51. },
  52. {
  53. "name": "run_id",
  54. "short": "r",
  55. "default": "2024",
  56. "help": "Run identifier.",
  57. },
  58. ],
  59. "verbosity": 2,
  60. }