1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- # this script takes submission and country as input (from doit) and
- # runs the appropriate script to extract the submission data
- import sys
- import datalad.api
- from pathlib import Path
- import argparse
- from get_submissions_info import get_code_file
- from get_submissions_info import get_possible_inputs
- from get_submissions_info import get_possible_outputs
- # Find the right function and possible input and output files and
- # read the data using datalad run.
- parser = argparse.ArgumentParser()
- parser.add_argument('--country', help='Country name or code')
- parser.add_argument('--submission', help='Submission to read')
- args = parser.parse_args()
- country = args.country
- submission = args.submission
- codepath = Path(__file__).parent
- rootpath = codepath / ".." / ".."
- rootpath = rootpath.resolve()
- print(f"Attempting to extract data for {submission} from {country}.")
- print("#"*80)
- print("")
- # get the correct script
- script_name = get_code_file(country, submission)
- if script_name is not None:
- print(f"Found code file {script_name}")
- print("")
- # get possible input files
- input_files = get_possible_inputs(country, submission)
- if not input_files:
- print(f"No possible input files found for {country}, {submission}. "
- f"Something might be wrong here.")
- else:
- print(f"Found the following input_files:")
- for file in input_files:
- print(file)
- print("")
- # make input files absolute to avoid datalad confusions when
- # root directory is via symlink
- input_files = [rootpath / file for file in input_files]
- # convert file path's to str
- input_files = [file.as_posix() for file in input_files]
- # get possible output files
- output_files = get_possible_outputs(country, submission)
- if not output_files:
- print(f"No possible output files found for {country}, {submission}. "
- f"This is either the first run or something is wrong.")
- else:
- print(f"Found the following output_files:")
- for file in output_files:
- print(file)
- print("")
- # convert file path's to str
- output_files = [file.as_posix() for file in output_files]
- print(f"Run the script using datalad run via the python api")
- datalad.api.run(
- cmd=f"./venv/bin/python3 {script_name.as_posix()}",
- dataset=rootpath,
- message=f"Read data for {country}, {submission}.",
- inputs=input_files,
- outputs=output_files,
- dry_run=None,
- explicit=True,
- )
- else:
- # no code found.
- print(f"No code found to read {submission} from {country}")
- print(f"Use 'doit country_info --country={country} to get "
- f"a list of available submissions and datasets.")
|