read_UNFCCC_submission.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # this script takes submission and country as input (from doit) and
  2. # runs the appropriate script to extract the submission data
  3. import sys
  4. import datalad.api
  5. from pathlib import Path
  6. import argparse
  7. from get_submissions_info import get_code_file
  8. from get_submissions_info import get_possible_inputs
  9. from get_submissions_info import get_possible_outputs
  10. # Find the right function and possible input and output files and
  11. # read the data using datalad run.
  12. parser = argparse.ArgumentParser()
  13. parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
  14. parser.add_argument('--submission', help='Submission to read')
  15. args = parser.parse_args()
  16. country = args.country
  17. submission = args.submission
  18. codepath = Path(__file__).parent
  19. rootpath = codepath / ".." / ".."
  20. rootpath = rootpath.resolve()
  21. print(f"Attempting to extract data for {submission} from {country}.")
  22. print("#"*80)
  23. print("")
  24. # get the correct script
  25. script_name = get_code_file(country, submission)
  26. if script_name is not None:
  27. print(f"Found UNFCCC_GHG_data file {script_name}")
  28. print("")
  29. # get possible input files
  30. input_files = get_possible_inputs(country, submission)
  31. if not input_files:
  32. print(f"No possible input files found for {country}, {submission}. "
  33. f"Something might be wrong here.")
  34. else:
  35. print(f"Found the following input_files:")
  36. for file in input_files:
  37. print(file)
  38. print("")
  39. # make input files absolute to avoid datalad confusions when
  40. # root directory is via symlink
  41. input_files = [rootpath / file for file in input_files]
  42. # convert file's path to str
  43. input_files = [file.as_posix() for file in input_files]
  44. # get possible output files
  45. output_files = get_possible_outputs(country, submission)
  46. if not output_files:
  47. print(f"No possible output files found for {country}, {submission}. "
  48. f"This is either the first run or something is wrong.")
  49. else:
  50. print(f"Found the following output_files:")
  51. for file in output_files:
  52. print(file)
  53. print("")
  54. # convert file path's to str
  55. output_files = [file.as_posix() for file in output_files]
  56. print(f"Run the script using datalad run via the python api")
  57. datalad.api.run(
  58. cmd=f"./venv/bin/python3 {script_name.as_posix()}",
  59. dataset=rootpath,
  60. message=f"Read data for {country}, {submission}.",
  61. inputs=input_files,
  62. outputs=output_files,
  63. dry_run=None,
  64. explicit=True,
  65. )
  66. else:
  67. # no UNFCCC_GHG_data found.
  68. print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
  69. print(f"Use 'doit country_info --country={country} to get "
  70. f"a list of available submissions and datasets.")