dodo.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
  6. def get_output_folders(domains_and_releases_to_read):
  7. """Get the paths of folders where output files will be saved"""
  8. output_folders = []
  9. # todo remove hard coded key
  10. for domain, release in domains_and_releases_to_read["2024"]:
  11. # todo pathlib Path
  12. output_folders.append(f"downloaded_data/{domain}/{release}")
  13. return output_folders
  14. def task_test_basic_target():
  15. """
  16. test
  17. """
  18. def do_nothing():
  19. pass
  20. return {"actions": [do_nothing]}
  21. def task_test_download_target():
  22. """
  23. test datalad target
  24. """
  25. def datalad_run_download():
  26. datalad.api.run(cmd="python3 scripts/download_all_domains.py")
  27. return {"actions": [datalad_run_download]}
  28. def task_read_data():
  29. """
  30. read data set
  31. """
  32. def read_dataset(save_path, run_id):
  33. output_folders = get_output_folders(domains_and_releases_to_read)
  34. print(f"Reading dataset for {save_path=} and {run_id=}")
  35. cmd = (
  36. f"python3 scripts/read_data_set.py "
  37. f"--save_path {save_path} --run_id {run_id}"
  38. )
  39. datalad.api.run(
  40. cmd=cmd,
  41. message="Read data set",
  42. outputs=output_folders,
  43. )
  44. return {
  45. "actions": [read_dataset],
  46. "params": [
  47. {
  48. "name": "save_path",
  49. "short": "s",
  50. "long": "save_path",
  51. "default": "extracted_data",
  52. "help": "Path to save the data.",
  53. },
  54. {
  55. "name": "run_id",
  56. "long": "run_id",
  57. "short": "r",
  58. "default": "2024",
  59. "help": "Run identifier.",
  60. },
  61. ],
  62. "verbosity": 2,
  63. }