get_submissions_info.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # helper functions to get information on available submissions
  2. # and data reading functions for a given country
  3. from typing import Union, List, Dict
  4. from pathlib import Path
  5. import json
  6. import countrynames
  7. import os
  8. def get_country_submissions(
  9. country_name: str,
  10. print_sub: bool = True,
  11. ) -> Dict[str, List[str]]:
  12. """
  13. Input is a three letter ISO code for a country, or the countries name.
  14. The function tries to map the country name to an ISO code and then
  15. queries the folder mapping files for folders.
  16. Parameters
  17. ----------
  18. country_name: str
  19. String containing the country name or ISO 3 letter code
  20. print_sub: bool
  21. If True information on submissions will be written to stdout
  22. Returns
  23. -------
  24. returns a dict with keys for the dataset classes (e.g. UNFCCC, non-UNFCCC)
  25. Each value is a list of folders
  26. """
  27. codepath = Path(__file__).parent
  28. data_folder = codepath / ".." / ".." / "downloaded_data"
  29. # obtain country code
  30. country_code = countrynames.to_code_3(country_name)
  31. if country_code is None:
  32. raise ValueError(f"Country name {country_name} can not be mapped to "
  33. f"any country code")
  34. if print_sub:
  35. print(f"Country name {country_name} maps to ISO code {country_code}")
  36. country_submissions = {}
  37. for item in data_folder.iterdir():
  38. if item.is_dir():
  39. if print_sub:
  40. print("")
  41. print("#" * 80)
  42. print(f"Data folder {item.name}")
  43. with open(item / "folder_mapping.json", "r") as mapping_file:
  44. folder_mapping = json.load(mapping_file)
  45. country_folders = folder_mapping[country_code]
  46. if isinstance(country_folders, str):
  47. # only one folder
  48. country_folders = [country_folders]
  49. submission_folders = []
  50. for country_folder in country_folders:
  51. current_folder = item / country_folder
  52. if print_sub:
  53. print("-" * 80)
  54. print(f"Submissions in folder {country_folder}:")
  55. for submission_folder in current_folder.iterdir():
  56. if submission_folder.is_dir():
  57. if print_sub:
  58. print(submission_folder.name)
  59. submission_folders.append(submission_folder.name)
  60. country_submissions[item.name] = submission_folders
  61. return country_submissions
  62. def create_folder_mapping(
  63. folder: str,
  64. extracted: bool = False
  65. ) -> None:
  66. """
  67. Create a mapping from 3 letter ISO country codes to folders
  68. based on the subfolders of the given folder. The mapping is
  69. stored in 'folder_mapping.json' in the given folder.
  70. Parameters
  71. ----------
  72. folder: str
  73. folder to create the mapping for
  74. extracted: bool = False
  75. If true treat the folder as extracted data, where we
  76. only have one folder per country and no typos in the
  77. names
  78. Returns
  79. -------
  80. Nothing
  81. """
  82. if extracted:
  83. folder_mapping = {}
  84. else:
  85. folder_mapping = {
  86. 'VEN': 'Venezeula_(Bolivarian_Republic_of)',
  87. 'FSM': 'Micronesia_(Federated_State_of)',
  88. 'MKD': 'The_Republic_of_North_Macedonia',
  89. }
  90. known_folders = list(folder_mapping.values())
  91. for item in folder.iterdir():
  92. if item.is_dir():
  93. ISO3 = countrynames.to_code_3(item.name)
  94. if ISO3 is None:
  95. if item.name not in known_folders:
  96. print(folder_mapping.values())
  97. print(f"No match for {item.name}")
  98. else:
  99. known_folders.append(item.name)
  100. if ISO3 in folder_mapping.keys():
  101. folder_mapping[ISO3] = [folder_mapping[ISO3], item.name]
  102. else:
  103. folder_mapping[ISO3] = item.name
  104. with open(folder / "folder_mapping.json", "w") as mapping_file:
  105. json.dump(folder_mapping, mapping_file, indent=4)