浏览代码

fix country mapping [skip ci]

Daniel Busch 5 月之前
父节点
当前提交
13d03c4041
共有 2 个文件被更改,包括 548 次插入101 次删除
  1. 393 0
      src/faostat_data_primap/helper/definitions.py
  2. 155 101
      src/faostat_data_primap/read.py

+ 393 - 0
src/faostat_data_primap/helper/definitions.py

@@ -115,6 +115,7 @@ areas_to_remove_base = [
     "Annex I countries",
     "Non-Annex I countries",
     "OECD",
+"Europe, Northern America, Australia and New Zealand",
 ]
 
 read_config_all = {
@@ -200,6 +201,8 @@ read_config_all = {
                 "Belgium-Luxembourg",
                 "Serbia and Montenegro",
                 "European Union (27)",
+                # drop duplicate country
+                "China, mainland",
             ],
             "elements_to_remove": [
                 "Stocks",  # number of animals
@@ -313,6 +316,8 @@ read_config_all = {
                 "Belgium-Luxembourg",
                 "Serbia and Montenegro",
                 "European Union (27)",
+                # "China" and "China, mainland" included with identical data
+                "China, mainland",
             ],
             "elements_to_remove": [
                 "Area",
@@ -330,4 +335,392 @@ read_config_all = {
             ],
         }
     },
+    "pre_post_agricultural_production": {
+        "2023-11-09": {
+            "filename": "Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+                "Channel Islands",
+            ],
+            "elements_to_remove": [
+                "Energy Use (Total)",
+                "Energy Use (Electricity)",
+                "Energy Use (Natural Gas, including LNG)",
+                "Energy Use (Heat)",
+                "Energy Use (Coal)",
+            ],
+            "entity_mapping": {
+                "Emissions (CO2)": "CO2",
+                "Emissions (CO2eq) (AR5)": "KYOTOGHG (AR5GWP100)",
+                "Emissions (CH4)": "CH4",
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2eq) from F-gases (AR5)": "FGASES (AR5GWP100)",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+            ],
+        }
+    },
+}
+
+# from https://www.fao.org/faostat/en/#definitions
+country_to_iso3_mapping = {
+    "Afghanistan": "AFG",
+    "Africa": "X06",
+    "Åland Islands": "ALA",
+    "Albania": "ALB",
+    "Algeria": "DZA",
+    "American Samoa": "ASM",
+    "Americas": "X21",
+    "Andorra": "AND",
+    "Angola": "AGO",
+    "Anguilla": "AIA",
+    "Annex I countries": "F5848",
+    "Antarctic Region": "F5600",
+    "Antarctica": "ATA",
+    "Antigua and Barbuda": "ATG",
+    "Argentina": "ARG",
+    "Armenia": "ARM",
+    "Aruba": "ABW",
+    "Asia": "F5300",
+    "Australia": "AUS",
+    "Australia and New Zealand": "F5501",
+    "Austria": "AUT",
+    "Azerbaijan": "AZE",
+    "Bahamas": "BHS",
+    "Bahrain": "BHR",
+    "Bangladesh": "BGD",
+    "Barbados": "BRB",
+    "Belarus": "BLR",
+    "Belgium": "BEL",
+    "Belgium-Luxembourg": "F15",
+    "Belize": "BLZ",
+    "Benin": "BEN",
+    "Bermuda": "BMU",
+    "Bhutan": "BTN",
+    "Bolivia (Plurinational State of)": "BOL",
+    "Bonaire, Sint Eustatius and Saba": "BES",
+    "Bosnia and Herzegovina": "BIH",
+    "Botswana": "BWA",
+    "Bouvet Island": "BVT",
+    "Brazil": "BRA",
+    "British Virgin Islands": "VGB",
+    "Brunei Darussalam": "BRN",
+    "Bulgaria": "BGR",
+    "Burkina Faso": "BFA",
+    "Burundi": "BDI",
+    "Cabo Verde": "CPV",
+    "Cambodia": "KHM",
+    "Cameroon": "CMR",
+    "Canada": "CAN",
+    "Caribbean": "F5206",
+    "Caucasus and Central Asia": "F5857",
+    "Cayman Islands": "CYM",
+    "Central African Republic": "CAF",
+    "Central America": "F5204",
+    "Central Asia": "F5301",
+    "Central Asia and Southern Asia": "F5306",
+    "Chad": "TCD",
+    "Chagos Archipelago": "IOT",
+    "Channel Islands": "CHA",
+    "Chile": "CHL",
+    "China": "F351",
+    "China, Hong Kong SAR": "HKG",
+    "China, Macao SAR": "MAC",
+    "China, mainland": "CHN",
+    "China, Taiwan Province of": "TWN",
+    "Christmas Island": "CXR",
+    "Cocos (Keeling) Islands": "CCK",
+    "Colombia": "COL",
+    "Comoros": "COM",
+    "Congo": "COG",
+    "Cook Islands": "COK",
+    "Costa Rica": "CRI",
+    "Côte d'Ivoire": "CIV",
+    "Croatia": "HRV",
+    "Cuba": "CUB",
+    "Curaçao": "CUW",
+    "Cyprus": "CYP",
+    "Czechia": "CZE",
+    "Czechoslovakia": "F51",
+    "Democratic People's Republic of Korea": "PRK",
+    "Democratic Republic of the Congo": "COD",
+    "Denmark": "DNK",
+    "Djibouti": "DJI",
+    "Dominica": "DMA",
+    "Dominican Republic": "DOM",
+    "Eastern Africa": "F5101",
+    "Eastern Asia": "F5302",
+    "Eastern Asia (excluding Japan and China)": "F5829",
+    "Eastern Asia and South-eastern Asia": "F5307",
+    "Eastern Europe": "F5401",
+    "Ecuador": "ECU",
+    "Egypt": "EGY",
+    "El Salvador": "SLV",
+    "Equatorial Guinea": "GNQ",
+    "Eritrea": "ERI",
+    "Estonia": "EST",
+    "Eswatini": "SWZ",
+    "Ethiopia": "ETH",
+    "Ethiopia PDR": "F62",
+    "Europe": "F5400",
+    "Europe, Northern America, Australia and New Zealand": "nan",
+    "European Union (27)": "F5707",
+    "Falkland Islands (Malvinas)": "FLK",
+    "FAO Major Fishing Area: Atlantic, Eastern Central (14.4.1)": "F99029",
+    "FAO Major Fishing Area: Atlantic, Northeast (14.4.1)": "F99024",
+    "FAO Major Fishing Area: Atlantic, Northwest (14.4.1)": "F99023",
+    "FAO Major Fishing Area: Atlantic, Southeast (14.4.1)": "F99026",
+    "FAO Major Fishing Area: Atlantic, Southwest (14.4.1)": "F99030",
+    "FAO Major Fishing Area: Atlantic, Western Central (14.4.1)": "F99028",
+    "FAO Major Fishing Area: Indian Ocean, Eastern (14.4.1)": "F99025",
+    "FAO Major Fishing Area: Indian Ocean, Western (14.4.1)": "F99027",
+    "FAO Major Fishing Area: Mediterranean and Black Sea (14.4.1)": "F99032",
+    "FAO Major Fishing Area: Pacific, Eastern Central (14.4.1)": "F99018",
+    "FAO Major Fishing Area: Pacific, Northeast (14.4.1)": "F99019",
+    "FAO Major Fishing Area: Pacific, Northwest (14.4.1)": "F99020",
+    "FAO Major Fishing Area: Pacific, Southeast (14.4.1)": "F99031",
+    "FAO Major Fishing Area: Pacific, Southwest (14.4.1)": "F99022",
+    "FAO Major Fishing Area: Pacific, Western Central (14.4.1)": "F99021",
+    "Faroe Islands": "FRO",
+    "Fiji": "FJI",
+    "Finland": "FIN",
+    "France": "FRA",
+    "French Guiana": "GUF",
+    "French Polynesia": "PYF",
+    "French Southern Territories": "ATF",
+    "Gabon": "GAB",
+    "Gambia": "GMB",
+    "Georgia": "GEO",
+    "Germany": "DEU",
+    "Germany Fr": "F78",
+    "Germany Nl": "F77",
+    "Ghana": "GHA",
+    "Gibraltar": "GIB",
+    "Greece": "GRC",
+    "Greenland": "GRL",
+    "Grenada": "GRD",
+    "Guadeloupe": "GLP",
+    "Guam": "GUM",
+    "Guatemala": "GTM",
+    "Guernsey": "GGY",
+    "Guinea": "GIN",
+    "Guinea-Bissau": "GNB",
+    "Guyana": "GUY",
+    "Haiti": "HTI",
+    "Heard and McDonald Islands": "HMD",
+    "High-income economies": "F9010",
+    "Holy See": "VAT",
+    "Honduras": "HND",
+    "Hungary": "HUN",
+    "Iceland": "ISL",
+    "India": "IND",
+    "Indonesia": "IDN",
+    "International Centres (FAO) (2.5.1.a)": "F5823",
+    "Iran (Islamic Republic of)": "IRN",
+    "Iraq": "IRQ",
+    "Ireland": "IRL",
+    "Isle of Man": "IMN",
+    "Israel": "ISR",
+    "Italy": "ITA",
+    "Jamaica": "JAM",
+    "Japan": "JPN",
+    "Jersey": "JEY",
+    "Johnston Island": "JTN",
+    "Jordan": "JOR",
+    "Kazakhstan": "KAZ",
+    "Kenya": "KEN",
+    "Kiribati": "KIR",
+    "Kuwait": "KWT",
+    "Kyrgyzstan": "KGZ",
+    "Land Locked Developing Countries": "F5802",
+    "Lao People's Democratic Republic": "LAO",
+    "Latin America": "F348",
+    "Latin America and the Caribbean": "F5205",
+    "Latvia": "LVA",
+    "Least Developed Countries": "F5801",
+    "Lebanon": "LBN",
+    "Lesotho": "LSO",
+    "Liberia": "LBR",
+    "Libya": "LBY",
+    "Liechtenstein": "LIE",
+    "Lithuania": "LTU",
+    "Low income economies": "F5858",
+    "Low Income Food Deficit Countries": "F5815",
+    "Lower-middle-income economies": "F5859",
+    "Luxembourg": "LUX",
+    "Madagascar": "MDG",
+    "Malawi": "MWI",
+    "Malaysia": "MYS",
+    "Maldives": "MDV",
+    "Mali": "MLI",
+    "Malta": "MLT",
+    "Marshall Islands": "MHL",
+    "Martinique": "MTQ",
+    "Mauritania": "MRT",
+    "Mauritius": "MUS",
+    "Mayotte": "MYT",
+    "Melanesia": "F5502",
+    "Mexico": "MEX",
+    "Micronesia": "F5503",
+    "Micronesia (Federated States of)": "FSM",
+    "Middle Africa": "F5102",
+    "Midway Island": "MID",
+    "Monaco": "MCO",
+    "Mongolia": "MNG",
+    "Montenegro": "MNE",
+    "Montserrat": "MSR",
+    "Morocco": "MAR",
+    "Mozambique": "MOZ",
+    "Myanmar": "MMR",
+    "Namibia": "NAM",
+    "Nauru": "NRU",
+    "Nepal": "NPL",
+    "Net Food Importing Developing Countries": "F5817",
+    "Netherlands (Kingdom of the)": "NLD",
+    "Netherlands Antilles (former)": "ANT",
+    "New Caledonia": "NCL",
+    "New Zealand": "NZL",
+    "Nicaragua": "NIC",
+    "Niger": "NER",
+    "Nigeria": "NGA",
+    "Niue": "NIU",
+    "Non-Annex I countries": "F5849",
+    "Norfolk Island": "NFK",
+    "North and Central America": "F336",
+    "North Macedonia": "MKD",
+    "Northern Africa": "F5103",
+    "Northern Africa (excluding Sudan)": "F429",
+    "Northern America": "F5203",
+    "Northern America and Europe": "F5208",
+    "Northern Europe": "F5402",
+    "Northern Mariana Islands": "MNP",
+    "Norway": "NOR",
+    "Oceania": "F5500",
+    "Oceania excluding Australia and New Zealand": "F5807",
+    "OECD": "F5873",
+    "Oman": "OMN",
+    "Pacific Islands Trust Territory": "F164",
+    "Pakistan": "PAK",
+    "Palau": "PLW",
+    "Palestine": "PSE",
+    "Panama": "PAN",
+    "Papua New Guinea": "PNG",
+    "Paraguay": "PRY",
+    "Peru": "PER",
+    "Philippines": "PHL",
+    "Pitcairn": "PCN",
+    "Poland": "POL",
+    "Polynesia": "F5504",
+    "Portugal": "PRT",
+    "Puerto Rico": "PRI",
+    "Qatar": "QAT",
+    "Regional Centres (FAO) (2.5.1.a)": "F5822",
+    "Republic of Korea": "KOR",
+    "Republic of Moldova": "MDA",
+    "Réunion": "REU",
+    "Romania": "ROU",
+    "Russian Federation": "RUS",
+    "Rwanda": "RWA",
+    "Saint Barthélemy": "BLM",
+    "Saint Helena, Ascension and Tristan da Cunha": "SHN",
+    "Saint Kitts and Nevis": "KNA",
+    "Saint Lucia": "LCA",
+    "Saint Martin (French part)": "MAF",
+    "Saint Pierre and Miquelon": "SPM",
+    "Saint Vincent and the Grenadines": "VCT",
+    "Samoa": "WSM",
+    "San Marino": "SMR",
+    "Sao Tome and Principe": "STP",
+    "Sark": "F285",
+    "Saudi Arabia": "SAU",
+    "Senegal": "SEN",
+    "Serbia": "SRB",
+    "Serbia and Montenegro": "SCG",
+    "Seychelles": "SYC",
+    "Sierra Leone": "SLE",
+    "Singapore": "SGP",
+    "Sint Maarten (Dutch part)": "SXM",
+    "Slovakia": "SVK",
+    "Slovenia": "SVN",
+    "Small Island Developing States": "F5803",
+    "Solomon Islands": "SLB",
+    "Somalia": "SOM",
+    "South Africa": "ZAF",
+    "South America": "F5207",
+    "South Georgia and the South Sandwich Islands": "SGS",
+    "South Sudan": "SSD",
+    "South-eastern Asia": "F5304",
+    "Southern Africa": "F5104",
+    "Southern Asia": "F5303",
+    "Southern Asia (excluding India)": "F5855",
+    "Southern Europe": "F5403",
+    "Spain": "ESP",
+    "Sri Lanka": "LKA",
+    "Sub-Saharan Africa": "F420",
+    "Sub-Saharan Africa (including Sudan)": "F5810",
+    "Sudan": "SDN",
+    "Sudan (former)": "F206",
+    "Suriname": "SUR",
+    "Svalbard and Jan Mayen Islands": "SJM",
+    "Sweden": "SWE",
+    "Switzerland": "CHE",
+    "Syrian Arab Republic": "SYR",
+    "Tajikistan": "TJK",
+    "Thailand": "THA",
+    "Timor-Leste": "TLS",
+    "Togo": "TGO",
+    "Tokelau": "TKL",
+    "Tonga": "TON",
+    "Trinidad and Tobago": "TTO",
+    "Tunisia": "TUN",
+    "Türkiye": "TUR",
+    "Turkmenistan": "TKM",
+    "Turks and Caicos Islands": "TCA",
+    "Tuvalu": "TUV",
+    "Uganda": "UGA",
+    "Ukraine": "UKR",
+    "United Arab Emirates": "ARE",
+    "United Kingdom of Great Britain and Northern Ireland": "GBR",
+    "United Republic of Tanzania": "TZA",
+    "United States Minor Outlying Islands": "UMI",
+    "United States of America": "USA",
+    "United States Virgin Islands": "VIR",
+    "Upper-middle-income economies": "F9011",
+    "Uruguay": "URY",
+    "USSR": "F228",
+    "Uzbekistan": "UZB",
+    "Vanuatu": "VUT",
+    "Venezuela (Bolivarian Republic of)": "VEN",
+    "Viet Nam": "VNM",
+    "Wake Island": "WAK",
+    "Wallis and Futuna Islands": "WLF",
+    "Western Africa": "F5105",
+    "Western Asia": "F5305",
+    "Western Asia (exc. Armenia, Azerbaijan, Cyprus, Israel and Georgia)": "F5828",
+    "Western Asia and Northern Africa": "F5308",
+    "Western Europe": "F5404",
+    "Western Sahara": "ESH",
+    "World": "X01",
+    "Yemen": "YEM",
+    "Yemen Ar Rp": "F246",
+    "Yemen Dem": "F247",
+    "Yugoslav SFR": "F248",
+    "Zambia": "ZMB",
+    "Zimbabwe": "ZWE",
+    # reading the csv correctly doesn't work for some domains
+    # todo there is probably a better way to solve this
+    "Côte d'Ivoire" : "CIV",
+    "Curaçao" : "CUW",
+    "Réunion" : "REU",
+    "Türkiye" : "TUR",
 }

+ 155 - 101
src/faostat_data_primap/read.py

@@ -7,103 +7,117 @@ import pycountry
 from src.faostat_data_primap.helper.definitions import (
     downloaded_data_path,
     read_config_all,
+country_to_iso3_mapping,
 )
-custom_country_mapping_code = {}
-custom_country_mapping_name = {
-    # farm gate agricultur energy
-    "Bolivia (Plurinational State of)": "BOL",
-    "China, Hong Kong SAR": "HKG",
-    "China, Macao SAR": "MAC",
-    "China, mainland": "CHN",
-    "China, Taiwan Province of": "TWN",
-    "Iran (Islamic Republic of)": "IRN",
-    "Czechoslovakia": "CSK",
-    "Ethiopia PDR": "ETH",
-    "Netherlands (Kingdom of the)": "NLD",
-    "Netherlands Antilles (former)": "ANT",
-    # todo is former Sudan same as the new (north) Sudan
-    "Sudan (former)": "SDN",
-    "USSR": "SUN",
-    "Venezuela (Bolivarian Republic of)": "VEN",
-    "Yugoslav SFR": "YUG",
-    "World": "EARTH",
-    # todo Andrews cement list below (deleted commented lines)
-    # "Bonaire, Saint Eustatius and Saba": "BES",
-    # "Cape Verde": "CPV",
-    "Democratic Republic of the Congo": "COD",
-    # "Faeroe Islands": "FRO",
-    "Micronesia (Federated States of)": "FSM",
-    # "Iran": "IRN",
-    # "Laos": "LAO",
-    # "Occupied Palestinian Territory": "PSE",
-    # "Swaziland": "SWZ",
-    # "Taiwan": "TWN",
-    "Wallis and Futuna Islands": "WLF",
-    # farm gate emissions crops
-    "United States Virgin Islands": "VIR",
-    # todo is this relevant to us?
-    'Pacific Islands Trust Territory' : 'PIC',
-    'Svalbard and Jan Mayen Islands' : "SJM",  # Norwy
-    # something goes wrong with french characters in land_use_forest
-    "Côte d'Ivoire" : "CIV",
-'Curaçao' : "CUW",
-   "Réunion" : "REU",
-'Türkiye' : "TUR",
-}
-
 
-def get_country_code(
-    country_name: str,
-) -> str:
-    """
-    Get country code for country name.
+# mapping = pd.read_csv("../../FAOSTAT_data_11-19-2024.csv")#, encoding="ISO-8859-1")
+# mapping_dict = {}
+# for idx, row in mapping.iterrows():
+#     mapping_dict[row['Country']] = row['ISO3 Code']
 
-    If the input is a code it will be returned,
-    if the input is not a three-letter code a search will be performed
 
-    Parameters
-    ----------
-    country_name: str
-        Country code or name to get the three-letter code for.
+# custom_country_mapping_code = {}
+# custom_country_mapping_name = {
+#     # farm gate agricultur energy
+#     "Bolivia (Plurinational State of)": "BOL",
+#     "China, Hong Kong SAR": "HKG",
+#     "China, Macao SAR": "MAC",
+#     "China, mainland": "CHN",
+#     "China, Taiwan Province of": "TWN",
+#     "Iran (Islamic Republic of)": "IRN",
+#     "Czechoslovakia": "CSK",
+#     "Ethiopia PDR": "ETH",
+#     "Netherlands (Kingdom of the)": "NLD",
+#     "Netherlands Antilles (former)": "ANT",
+#     # todo is former Sudan same as the new (north) Sudan
+#     "Sudan (former)": "SDN",
+#     "USSR": "SUN",
+#     "Venezuela (Bolivarian Republic of)": "VEN",
+#     "Yugoslav SFR": "YUG",
+#     "World": "EARTH",
+#     # todo Andrews cement list below (deleted commented lines)
+#     # "Bonaire, Saint Eustatius and Saba": "BES",
+#     # "Cape Verde": "CPV",
+#     "Democratic Republic of the Congo": "COD",
+#     # "Faeroe Islands": "FRO",
+#     "Micronesia (Federated States of)": "FSM",
+#     # "Iran": "IRN",
+#     # "Laos": "LAO",
+#     # "Occupied Palestinian Territory": "PSE",
+#     # "Swaziland": "SWZ",
+#     # "Taiwan": "TWN",
+#     "Wallis and Futuna Islands": "WLF",
+#     # farm gate emissions crops
+#     "United States Virgin Islands": "VIR",
+#     # todo is this relevant to us?
+#     "Pacific Islands Trust Territory": "PIC",
+#     "Svalbard and Jan Mayen Islands": "SJM",  # Norwy
+#     # something goes wrong with french characters in land_use_forest
+#     "Côte d'Ivoire": "CIV",
+#     "Curaçao": "CUW",
+#     "Réunion": "REU",
+#     "Türkiye": "TUR",
+#     # pycountry mixes up these
+#     'Niger' : 'NER',
+#     'Nigeria' : 'NGA',
+#     "Curaçao" : "CUW",
+#     "Republic of Korea" : 'KOR',
+#     "Democratic People's Republic of Korea" : "PRK",
+# }
 
-    Returns
-    -------
-        country_code: str
-
-    """
-    # First check if it's in the list of custom codes
-    if country_name in custom_country_mapping_code:
-        country_code = country_name
-    elif country_name in custom_country_mapping_name:
-        country_code = custom_country_mapping_name[country_name]
-    else:
-        try:
-            # check if it's a 3 letter UNFCCC_GHG_data
-            country = pycountry.countries.get(alpha_3=country_name)
-            country_code = country.alpha_3
-        except:
-            try:
-                country = pycountry.countries.search_fuzzy(
-                    country_name.replace("_", " ")
-                )
-            except:
-                msg = f"Cannot map country {country_name} to country code."
-                raise ValueError(msg)
-            if len(country) > 1:
-                country_code = None
-                for current_country in country:
-                    if current_country.name == country_name:
-                        country_code = current_country.alpha_3
-                if country_code is None:
-                    msg = (
-                        f"Country name {country_name} has {len(country)} "
-                        "possible results for country codes."
-                    )
-                    raise ValueError(msg)
-
-            country_code = country[0].alpha_3
-
-    return country_code
+#
+# def get_country_code(
+#     country_name: str,
+# ) -> str:
+#     """
+#     Get country code for country name.
+#
+#     If the input is a code it will be returned,
+#     if the input is not a three-letter code a search will be performed
+#
+#     Parameters
+#     ----------
+#     country_name: str
+#         Country code or name to get the three-letter code for.
+#
+#     Returns
+#     -------
+#         country_code: str
+#
+#     """
+#     # First check if it's in the list of custom codes
+#     if country_name in custom_country_mapping_code:
+#         country_code = country_name
+#     elif country_name in custom_country_mapping_name:
+#         country_code = custom_country_mapping_name[country_name]
+#     else:
+#         try:
+#             # check if it's a 3 letter UNFCCC_GHG_data
+#             country = pycountry.countries.get(alpha_3=country_name)
+#             country_code = country.alpha_3
+#         except:
+#             try:
+#                 country = pycountry.countries.search_fuzzy(
+#                     country_name.replace("_", " ")
+#                 )
+#             except:
+#                 msg = f"Cannot map country {country_name} to country code."
+#                 raise ValueError(msg)
+#             if len(country) > 1:
+#                 country_code = None
+#                 for current_country in country:
+#                     if current_country.name == country_name:
+#                         country_code = current_country.alpha_3
+#                 if country_code is None:
+#                     msg = (
+#                         f"Country name {country_name} has {len(country)} "
+#                         "possible results for country codes."
+#                     )
+#                     raise ValueError(msg)
+#
+#             country_code = country[0].alpha_3
+#
+#     return country_code
 
 
 files_to_read = (
@@ -128,9 +142,13 @@ files_to_read = (
         "2023-11-09",
     ),
     (
-    "land_use_forests",
+        "land_use_forests",
         "2024-11-14",
-    )
+    ),
+    (
+        "pre_post_agricultural_production",
+        "2023-11-09",
+    ),
 )
 
 df_all = None
@@ -142,7 +160,7 @@ for domain, release in reversed(files_to_read):
     print(f"Read {read_config["filename"]}")
     dataset_path = downloaded_data_path / domain / release / read_config["filename"]
     # There are some non-utf8 characters in Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
-    df_domain = pd.read_csv(dataset_path, encoding = "ISO-8859-1")
+    df_domain = pd.read_csv(dataset_path, encoding='ISO-8859-1')
 
     # remove rows by unit
     # todo this is maybe not a good idea as it hides the elements to be removed
@@ -159,13 +177,42 @@ for domain, release in reversed(files_to_read):
     if "areas_to_remove" in read_config.keys():
         df_domain = df_domain[~df_domain["Area"].isin(read_config["areas_to_remove"])]
 
+    # check for duplicates (same data, different country name)
+    # duplicates = df_domain.copy().drop(labels=["Area", "Area Code (M49)", "Area Code"], axis=1)
+    # duplicates = duplicates[duplicates.duplicated(keep=False)]
+    # if not duplicates.empty:
+    #     msg = f"Duplicate values for {domain}"
+    #     raise ValueError(msg)
+
     # country name to ISO3 country code mapping
-    countries_to_map = [c for c in df_domain["Area"].unique() if c not in country_mapping.keys()]
-    for country_to_map in countries_to_map:
-        country_mapping[country_to_map] = get_country_code(country_to_map)
+    # countries_to_map = [
+    #     c for c in df_domain["Area"].unique() if c not in country_mapping.keys()
+    # ]
+    # for country_to_map in countries_to_map:
+    #     country_mapping[country_to_map] = get_country_code(country_to_map)
+
+    # make sure we don't map duplicate country codes
+    # if len(country_mapping.values()) != len(set(country_mapping.values())):
+    #     duplicate_codes = [x for i, x in enumerate(list(country_mapping.values())) if list(country_mapping.values()).count(x) > 1]
+    #     duplicates = [(key, value) for (key, value) in country_mapping.items() if value in duplicate_codes]
+    #     msg = f"Duplicate country codes for {domain}. Check country_mapping"
+    #     raise ValueError(msg)
+
+
 
     # create country columns
-    df_domain["country (ISO3)"] = df_domain["Area"].map(country_mapping)
+    df_domain["country (ISO3)"] = df_domain["Area"].map(country_to_iso3_mapping)
+
+    # check all countries are converted into iso3 codes
+    if any(df_domain['country (ISO3)'].isna()):
+        raise ValueError
+
+    # check for duplicates (same data, different country name)
+    # duplicates = df_domain.copy().drop(labels=["Area", "Area Code (M49)", "Area Code"], axis=1)
+    # duplicates = duplicates[duplicates.duplicated(keep=False)]
+    # if not duplicates.empty:
+    #     msg = f"Duplicate values for {domain}. Check country {duplicates['country (ISO3)'].unique()}"
+    #     raise ValueError(msg)
 
     # create entity column
     df_domain["entity"] = df_domain["Element"].map(read_config["entity_mapping"])
@@ -181,7 +228,6 @@ for domain, release in reversed(files_to_read):
 
     if df_all is None:
         df_all = df_domain
-        break
     else:
         # makes sure there are no duplicate category names
         if any(
@@ -202,7 +248,7 @@ coords_cols = {
     "area": "country (ISO3)",
     "unit": "Unit",
     "entity": "entity",
-    "source" : "Source"
+    "source": "Source",
 }
 
 coords_terminologies = {"area": "ISO3", "category": "FAOSTAT"}
@@ -223,6 +269,8 @@ meta_data = {
     "comment": "tbd",
     "institution": "tbd",
 }
+# Rename columns to remove the "Y" prefix
+df_all.rename(columns=lambda x: x.lstrip('Y') if x.startswith('Y') else x, inplace=True)
 
 data_if = pm2.pm2io.convert_wide_dataframe_if(
     df_all,
@@ -235,6 +283,12 @@ data_if = pm2.pm2io.convert_wide_dataframe_if(
     meta_data=meta_data,
 )
 
+# convert to PRIMAP2 native format
+data_pm2 = pm2.pm2io.from_interchange_format(data_if, data_if.attrs)
+
+# convert back to IF for standardized units
+data_if = data_pm2.pr.to_interchange_format()
+
 pass
 # steps:
 # convert to primap2 format