Przeglądaj źródła

several fixes to folder mapping functions

Johannes Gütschow 2 lat temu
rodzic
commit
4180575eaa

+ 1 - 0
code/UNFCCC_reader/folder_mapping.json

@@ -8,6 +8,7 @@
     "VEN": "Venezeula_(Bolivarian_Republic_of)",
     "FSM": "Micronesia_(Federated_State_of)",
     "MKD": "The_Republic_of_North_Macedonia",
+    "KOR": "Republic_of_Korea",
     "PRK": "Republic_of_Korea",
     "ARG": "Argentina",
     "CHL": "Chile"

+ 56 - 45
code/UNFCCC_reader/get_submissions_info.py

@@ -25,6 +25,21 @@ custom_country_mapping = {
     "GBK": "United Kingdom of Great Britain and Northern Ireland",
 }
 
+custom_folders = {
+    'Venezeula_(Bolivarian_Republic_of)': 'VEN',
+    'Venezuela_(Bolivarian_Republic_of)': 'VEN',
+    'Micronesia_(Federated_State_of)': 'FSM',
+    'Micronesia_(Federated_States_of)': 'FSM',
+    'The_Republic_of_North_Macedonia': 'MKD',
+    'Republic_of_Korea': 'KOR',
+    'Bolivia_(Plurinational_State_of)': 'BOL',
+    'Türkiye': 'TUR',
+    'Iran_(Islamic_Republic_of)': 'IRN',
+    'Côte_d’Ivoire': 'CIV',
+    'Democratic_Republic_of_the_Congo': "COD",
+    'European_Union': 'EUA',
+}
+
 def get_country_submissions(
         country_name: str,
         print_sub: bool = True,
@@ -281,26 +296,30 @@ def get_country_code(
         Country code or name to get the three-letter code for.
 
     """
-    try:
-        # check if it's a 3 letter code
-        country = pycountry.countries.get(alpha_3=country_name)
-        country_code = country.alpha_3
-    except:
+    # First check if it's in the list of custom codes
+    if country_name in custom_country_mapping:
+        country_code = country_name
+    else:
         try:
-            country = pycountry.countries.search_fuzzy(country_name)
+            # check if it's a 3 letter code
+            country = pycountry.countries.get(alpha_3=country_name)
+            country_code = country.alpha_3
         except:
-            raise ValueError(f"Country name {country_name} can not be mapped to "
-                             f"any country code")
-        if len(country) > 1:
-            country_code = None
-            for current_country in country:
-                if current_country.name == country_name:
-                    country_code = current_country.alpha_3
-            if country_code is None:
-                raise ValueError(f"Country name {country_name} has {len(country)} "
-                                 f"possible results for country codes.")
-
-        country_code = country[0].alpha_3
+            try:
+                country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
+            except:
+                raise ValueError(f"Country name {country_name} can not be mapped to "
+                                 f"any country code. Try using the ISO3 code directly.")
+            if len(country) > 1:
+                country_code = None
+                for current_country in country:
+                    if current_country.name == country_name:
+                        country_code = current_country.alpha_3
+                if country_code is None:
+                    raise ValueError(f"Country name {country_name} has {len(country)} "
+                                     f"possible results for country codes.")
+
+            country_code = country[0].alpha_3
 
     return country_code
 
@@ -518,40 +537,32 @@ def create_folder_mapping(
     """
 
     folder = root_path / folder
-
-    folder_mapping = custom_country_mapping
-    if not extracted:
-        folder_mapping = {
-            **folder_mapping,
-            **{
-                'VEN': 'Venezeula_(Bolivarian_Republic_of)',
-                'FSM': 'Micronesia_(Federated_State_of)',
-                'MKD': 'The_Republic_of_North_Macedonia',
-            }
-        }
-    known_folders = list(folder_mapping.values())
-    print(f"known_folders: {known_folders}")
+    folder_mapping = {}
+    #if not extracted:
+    known_folders = custom_folders
+    #else:
+    #    known_folders = {}
 
     for item in folder.iterdir():
         if item.is_dir() and not item.match("__pycache__"):
-            try:
-                country = pycountry.countries.search_fuzzy(item.name.replace("_", " "))
-                if len(country) > 1:
+            if item.name in known_folders:
+                ISO3 = known_folders[item.name]
+            else:
+                try:
+                    country = pycountry.countries.search_fuzzy(item.name.replace("_", " "))
+                    if len(country) > 1:
+                        ISO3 = None
+                        for current_country in country:
+                            if current_country.name == item.name.replace("_", " "):
+                                ISO3 = current_country.alpha_3
+                    else:
+                        ISO3 = country[0].alpha_3
+                except:
                     ISO3 = None
-                    for current_country in country:
-                        if current_country.name == item.name.replace("_", " "):
-                            ISO3 = current_country.alpha_3
-                else:
-                    ISO3 = country[0].alpha_3
-            except:
-                ISO3 = None
 
             if ISO3 is None:
-                if item.name not in known_folders:
-                    print(folder_mapping.values())
-                    print(f"No match for {item.name}")
+                print(f"No match for {item.name}")
             else:
-                known_folders.append(item.name)
                 if ISO3 in folder_mapping.keys():
                     folder_mapping[ISO3] = [folder_mapping[ISO3], item.name]
                 else:

+ 24 - 14
downloaded_data/UNFCCC/folder_mapping.json

@@ -1,16 +1,4 @@
 {
-    "EUA": "European Union",
-    "EUC": "European Union",
-    "FRK": "France",
-    "DKE": "Denmark",
-    "DNM": "Denmark",
-    "GBK": "United Kingdom of Great Britain and Northern Ireland",
-    "VEN": "Venezeula_(Bolivarian_Republic_of)",
-    "FSM": "Micronesia_(Federated_State_of)",
-    "MKD": [
-        "The_Republic_of_North_Macedonia",
-        "North_Macedonia"
-    ],
     "GHA": "Ghana",
     "STP": "Sao_Tome_and_Principe",
     "PRK": [
@@ -24,6 +12,10 @@
     "MDG": "Madagascar",
     "BLR": "Belarus",
     "VUT": "Vanuatu",
+    "VEN": [
+        "Venezeula_(Bolivarian_Republic_of)",
+        "Venezuela_(Bolivarian_Republic_of)"
+    ],
     "UGA": "Uganda",
     "TUV": "Tuvalu",
     "GUY": "Guyana",
@@ -42,8 +34,13 @@
     "BRA": "Brazil",
     "CRI": "Costa_Rica",
     "VNM": "Viet_Nam",
+    "CIV": [
+        "C\u00f4te_d\u2019Ivoire",
+        "C\u00f4te_d'Ivoire"
+    ],
     "QAT": "Qatar",
     "NAM": "Namibia",
+    "COD": "Democratic_Republic_of_the_Congo",
     "MLT": "Malta",
     "SVN": "Slovenia",
     "BGR": "Bulgaria",
@@ -57,6 +54,10 @@
     "THA": "Thailand",
     "SUR": "Suriname",
     "TJK": "Tajikistan",
+    "FSM": [
+        "Micronesia_(Federated_States_of)",
+        "Micronesia_(Federated_State_of)"
+    ],
     "CHE": "Switzerland",
     "ISL": "Iceland",
     "DMA": "Dominica",
@@ -66,11 +67,14 @@
     "GRC": "Greece",
     "BHR": "Bahrain",
     "SMR": "San_Marino",
+    "TUR": [
+        "T\u00fcrkiye",
+        "Turkey"
+    ],
     "PRY": "Paraguay",
     "SLV": "El_Salvador",
     "LTU": "Lithuania",
     "AFG": "Afghanistan",
-    "TUR": "Turkey",
     "AND": "Andorra",
     "SLE": "Sierra_Leone",
     "IND": "India",
@@ -119,6 +123,10 @@
     "KIR": "Kiribati",
     "BDI": "Burundi",
     "BHS": "Bahamas",
+    "MKD": [
+        "North_Macedonia",
+        "The_Republic_of_North_Macedonia"
+    ],
     "MRT": "Mauritania",
     "LVA": "Latvia",
     "NGA": "Nigeria",
@@ -127,9 +135,9 @@
     "TKM": "Turkmenistan",
     "GNB": "Guinea-Bissau",
     "ZWE": "Zimbabwe",
-    "CIV": "C\u00f4te_d'Ivoire",
     "LBR": "Liberia",
     "SLB": "Solomon_Islands",
+    "IRN": "Iran_(Islamic_Republic_of)",
     "CPV": "Cabo_Verde",
     "DJI": "Djibouti",
     "MAR": "Morocco",
@@ -148,6 +156,7 @@
     "MUS": "Mauritius",
     "PER": "Peru",
     "BFA": "Burkina_Faso",
+    "EUA": "European_Union",
     "GBR": "United_Kingdom_of_Great_Britain_and_Northern_Ireland",
     "LCA": "Saint_Lucia",
     "NRU": "Nauru",
@@ -162,6 +171,7 @@
     "SWZ": "Eswatini",
     "ARE": "United_Arab_Emirates",
     "MNG": "Mongolia",
+    "BOL": "Bolivia_(Plurinational_State_of)",
     "MYS": "Malaysia",
     "VCT": "Saint_Vincent_and_the_Grenadines",
     "JAM": "Jamaica",

+ 1 - 8
extracted_data/UNFCCC/folder_mapping.json

@@ -1,11 +1,5 @@
 {
-    "EUA": "European Union",
-    "EUC": "European Union",
-    "FRK": "France",
-    "DKE": "Denmark",
-    "DNM": "Denmark",
-    "GBK": "United Kingdom of Great Britain and Northern Ireland",
-    "PRK": "Republic_of_Korea",
+    "KOR": "Republic_of_Korea",
     "BLR": "Belarus",
     "DNK": "Denmark",
     "FRA": "France",
@@ -25,7 +19,6 @@
     "NOR": "Norway",
     "AUT": "Austria",
     "UKR": "Ukraine",
-    "ARG": "Argentina",
     "FIN": "Finland",
     "BEL": "Belgium",
     "MCO": "Monaco",