read_UNFCCC_submission.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # this script takes submission and country as input (from doit) and
  2. # runs the appropriate script to extract the submission data
  3. import datalad.api
  4. import argparse
  5. from get_submissions_info import get_code_file
  6. from get_submissions_info import get_possible_inputs
  7. from get_submissions_info import get_possible_outputs
  8. from UNFCCC_GHG_data.helper import root_path
  9. # Find the right function and possible input and output files and
  10. # read the data using datalad run.
  11. parser = argparse.ArgumentParser()
  12. parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
  13. parser.add_argument('--submission', help='Submission to read')
  14. args = parser.parse_args()
  15. country = args.country
  16. submission = args.submission
  17. print(f"Attempting to extract data for {submission} from {country}.")
  18. print("#"*80)
  19. print("")
  20. # get the correct script
  21. script_name = get_code_file(country, submission)
  22. if script_name is not None:
  23. print(f"Found UNFCCC_GHG_data file {script_name}")
  24. print("")
  25. # get possible input files
  26. input_files = get_possible_inputs(country, submission)
  27. if not input_files:
  28. print(f"No possible input files found for {country}, {submission}. "
  29. f"Something might be wrong here.")
  30. else:
  31. print(f"Found the following input_files:")
  32. for file in input_files:
  33. print(file)
  34. print("")
  35. # make input files absolute to avoid datalad confusions when
  36. # root directory is via symlink
  37. input_files = [root_path / file for file in input_files]
  38. # convert file's path to str
  39. input_files = [file.as_posix() for file in input_files]
  40. # get possible output files
  41. output_files = get_possible_outputs(country, submission)
  42. if not output_files:
  43. print(f"No possible output files found for {country}, {submission}. "
  44. f"This is either the first run or something is wrong.")
  45. else:
  46. print(f"Found the following output_files:")
  47. for file in output_files:
  48. print(file)
  49. print("")
  50. # convert file path's to str
  51. output_files = [file.as_posix() for file in output_files]
  52. print(f"Run the script using datalad run via the python api")
  53. datalad.api.run(
  54. cmd=f"./venv/bin/python3 {script_name.as_posix()}",
  55. dataset=root_path,
  56. message=f"Read data for {country}, {submission}.",
  57. inputs=input_files,
  58. outputs=output_files,
  59. dry_run=None,
  60. explicit=True,
  61. )
  62. else:
  63. # no UNFCCC_GHG_data found.
  64. print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
  65. print(f"Use 'doit country_info --country={country} to get "
  66. f"a list of available submissions and datasets.")