dodo.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. Define tasks to download and read the FAO data set.
  3. """
  4. import datalad.api
  5. def task_add_github_sibling():
  6. """
  7. Create github sibling after cloning from gin
  8. """
  9. def run_add_sibling():
  10. datalad.api.create_sibling_github(
  11. "primap-community/FAOSTAT_data_primap",
  12. name="github",
  13. existing="error",
  14. access_protocol="ssh",
  15. publish_depends="origin",
  16. private=False,
  17. dry_run=False,
  18. api="https://api.github.com",
  19. )
  20. return {"actions": [run_add_sibling]}
  21. def task_download():
  22. """
  23. Download latest data
  24. """
  25. def datalad_run_download():
  26. datalad.api.run(
  27. cmd="python3 scripts/download_all_domains.py",
  28. outputs="downloaded_data",
  29. )
  30. return {"actions": [datalad_run_download]}
  31. def task_read():
  32. """
  33. read data set
  34. """
  35. def read_dataset(save_path, run_id):
  36. print(f"Reading dataset for {save_path=} and {run_id=}")
  37. cmd = (
  38. f"python3 scripts/read_data_set.py "
  39. f"--save_path {save_path} --run_id {run_id}"
  40. )
  41. datalad.api.run(
  42. cmd=cmd,
  43. message="Read data set",
  44. outputs=f"{save_path}",
  45. )
  46. return {
  47. "actions": [read_dataset],
  48. "params": [
  49. {
  50. "name": "save_path",
  51. "short": "s",
  52. "long": "save_path",
  53. "default": "extracted_data",
  54. "help": "Path to save the data.",
  55. },
  56. {
  57. "name": "run_id",
  58. "long": "run_id",
  59. "short": "r",
  60. "default": "2024",
  61. "help": "Run identifier.",
  62. },
  63. ],
  64. "verbosity": 2,
  65. }