get_submissions_info.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. # helper functions to get information on available submissions
  2. # and data reading functions for a given country
  3. from typing import List, Dict
  4. from pathlib import Path
  5. import json
  6. import pycountry
  7. from UNFCCC_GHG_data.helper import root_path, downloaded_data_path, extracted_data_path
  8. from UNFCCC_GHG_data.helper import get_country_code
  9. code_path = root_path / "UNFCCC_GHG_data" / "UNFCCC_reader"
  10. def get_possible_inputs(
  11. country_name: str,
  12. submission: str,
  13. print_info: bool = False,
  14. ) -> List[Path]:
  15. """
  16. For given country name and submission find the possible input files
  17. Parameters
  18. ----------
  19. country_name: str
  20. String containing the country name or ISO 3 letter UNFCCC_GHG_data
  21. submission: str
  22. String of the submission
  23. print_info: bool = False
  24. If True print information on UNFCCC_GHG_data found
  25. Returns
  26. -------
  27. returns a list pathlib Path objects for the input files
  28. """
  29. data_folder = downloaded_data_path
  30. # obtain country UNFCCC_GHG_data
  31. country_code = get_country_code(country_name)
  32. if print_info:
  33. print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
  34. input_files = []
  35. for item in data_folder.iterdir():
  36. if item.is_dir():
  37. with open(item / "folder_mapping.json", "r") as mapping_file:
  38. folder_mapping = json.load(mapping_file)
  39. if country_code in folder_mapping:
  40. country_folders = folder_mapping[country_code]
  41. if isinstance(country_folders, str):
  42. # only one folder
  43. country_folders = [country_folders]
  44. for country_folder in country_folders:
  45. input_folder = item / country_folder / submission
  46. if input_folder.exists():
  47. for filepath in input_folder.glob("*"):
  48. input_files.append(filepath.relative_to(root_path))
  49. if print_info:
  50. if input_files:
  51. print(f"Found possible input files:")
  52. for file in input_files:
  53. print(file)
  54. else:
  55. print(f"No input files found")
  56. return input_files
  57. def get_possible_outputs(
  58. country_name: str,
  59. submission: str,
  60. print_info: bool = False,
  61. )-> List[Path]:
  62. """
  63. For given country name and submission find the possible output files
  64. Parameters
  65. ----------
  66. country_name: str
  67. String containing the country name or ISO 3 letter UNFCCC_GHG_data
  68. submission: str
  69. String of the submission
  70. print_info: bool = False
  71. If True print information on outputs found
  72. Returns
  73. -------
  74. returns a list pathlib Path objects for the input files
  75. """
  76. data_folder = extracted_data_path
  77. # obtain country UNFCCC_GHG_data
  78. country_code = get_country_code(country_name)
  79. if print_info:
  80. print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
  81. output_files = []
  82. for item in data_folder.iterdir():
  83. if item.is_dir():
  84. with open(item / "folder_mapping.json", "r") as mapping_file:
  85. folder_mapping = json.load(mapping_file)
  86. if country_code in folder_mapping:
  87. country_folder = folder_mapping[country_code]
  88. if not isinstance(country_folder, str):
  89. raise ValueError("Wrong data type in folder mapping json file. Should be str.")
  90. output_folder = item / country_folder
  91. if output_folder.exists():
  92. for filepath in output_folder.glob(country_code + "_" + submission + "*"):
  93. output_files.append(filepath.relative_to(root_path))
  94. if print_info:
  95. if output_files:
  96. print(f"Found possible output files:")
  97. for file in output_files:
  98. print(file)
  99. else:
  100. print(f"No output files found")
  101. return output_files