1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- """
- Define tasks to download and read the FAO data set.
- """
- import datalad.api
- from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
- def get_output_folders(domains_and_releases_to_read):
- """Get the paths of folders where output files will be saved"""
- output_folders = []
- # todo remove hard coded key
- for domain, release in domains_and_releases_to_read["2024"]:
- # todo pathlib Path
- output_folders.append(f"downloaded_data/{domain}/{release}")
- return output_folders
- def task_test_basic_target():
- """
- test
- """
- def do_nothing():
- pass
- return {"actions": [do_nothing]}
- def task_test_download_target():
- """
- test datalad target
- """
- def datalad_run_download():
- datalad.api.run(cmd="python3 scripts/download_all_domains.py")
- return {"actions": [datalad_run_download]}
- def task_read_data():
- """
- read data set
- """
- def read_dataset(save_path, run_id):
- output_folders = get_output_folders(domains_and_releases_to_read)
- print(f"Reading dataset for {save_path=} and {run_id=}")
- cmd = (
- f"python3 scripts/read_data_set.py "
- f"--save_path {save_path} --run_id {run_id}"
- )
- datalad.api.run(
- cmd=cmd,
- message="Read data set",
- outputs=output_folders,
- )
- return {
- "actions": [read_dataset],
- "params": [
- {
- "name": "save_path",
- "short": "s",
- "long": "save_path",
- "default": "extracted_data",
- "help": "Path to save the data.",
- },
- {
- "name": "run_id",
- "long": "run_id",
- "short": "r",
- "default": "2024",
- "help": "Run identifier.",
- },
- ],
- "verbosity": 2,
- }
|