Browse Source

clean up, refactor

Daniel Busch 3 months ago
parent
commit
8ae9ca118d

+ 5 - 0
Makefile

@@ -82,3 +82,8 @@ virtual-environment:  ## update virtual environment, create a new one if it does
 download_all_domains:
 	# downloads and stages (datalad save) all available data
 	datalad run poetry run python3 scripts/download_all_domains.py
+
+.PHONY: read_latest_data
+download_all_domains:
+	# reads and stages (datalad save) the latest data for each domain
+	datalad run poetry run python3 scripts/read_all_domains.py

+ 8 - 0
scripts/read_all_domains.py

@@ -0,0 +1,8 @@
+"""Read the latest release of all available domains."""
+
+from faostat_data_primap.read import (
+    read_latest_data,
+)
+
+if __name__ == "__main__":
+    read_latest_data()

+ 353 - 0
src/faostat_data_primap/helper/country_mapping.py

@@ -0,0 +1,353 @@
+"""Country mapping according to https://www.fao.org/faostat/en/#definitions"""
+
+country_to_iso3_mapping = {
+    "Afghanistan": "AFG",
+    "Africa": "X06",
+    "Åland Islands": "ALA",
+    "Albania": "ALB",
+    "Algeria": "DZA",
+    "American Samoa": "ASM",
+    "Americas": "X21",
+    "Andorra": "AND",
+    "Angola": "AGO",
+    "Anguilla": "AIA",
+    "Annex I countries": "F5848",
+    "Antarctic Region": "F5600",
+    "Antarctica": "ATA",
+    "Antigua and Barbuda": "ATG",
+    "Argentina": "ARG",
+    "Armenia": "ARM",
+    "Aruba": "ABW",
+    "Asia": "F5300",
+    "Australia": "AUS",
+    "Australia and New Zealand": "F5501",
+    "Austria": "AUT",
+    "Azerbaijan": "AZE",
+    "Bahamas": "BHS",
+    "Bahrain": "BHR",
+    "Bangladesh": "BGD",
+    "Barbados": "BRB",
+    "Belarus": "BLR",
+    "Belgium": "BEL",
+    "Belgium-Luxembourg": "F15",
+    "Belize": "BLZ",
+    "Benin": "BEN",
+    "Bermuda": "BMU",
+    "Bhutan": "BTN",
+    "Bolivia (Plurinational State of)": "BOL",
+    "Bonaire, Sint Eustatius and Saba": "BES",
+    "Bosnia and Herzegovina": "BIH",
+    "Botswana": "BWA",
+    "Bouvet Island": "BVT",
+    "Brazil": "BRA",
+    "British Virgin Islands": "VGB",
+    "Brunei Darussalam": "BRN",
+    "Bulgaria": "BGR",
+    "Burkina Faso": "BFA",
+    "Burundi": "BDI",
+    "Cabo Verde": "CPV",
+    "Cambodia": "KHM",
+    "Cameroon": "CMR",
+    "Canada": "CAN",
+    "Caribbean": "F5206",
+    "Caucasus and Central Asia": "F5857",
+    "Cayman Islands": "CYM",
+    "Central African Republic": "CAF",
+    "Central America": "F5204",
+    "Central Asia": "F5301",
+    "Central Asia and Southern Asia": "F5306",
+    "Chad": "TCD",
+    "Chagos Archipelago": "IOT",
+    "Channel Islands": "CHA",
+    "Chile": "CHL",
+    "China": "F351",
+    "China, Hong Kong SAR": "HKG",
+    "China, Macao SAR": "MAC",
+    "China, mainland": "CHN",
+    "China, Taiwan Province of": "TWN",
+    "Christmas Island": "CXR",
+    "Cocos (Keeling) Islands": "CCK",
+    "Colombia": "COL",
+    "Comoros": "COM",
+    "Congo": "COG",
+    "Cook Islands": "COK",
+    "Costa Rica": "CRI",
+    "Côte d'Ivoire": "CIV",
+    "Croatia": "HRV",
+    "Cuba": "CUB",
+    "Curaçao": "CUW",
+    "Cyprus": "CYP",
+    "Czechia": "CZE",
+    "Czechoslovakia": "F51",
+    "Democratic People's Republic of Korea": "PRK",
+    "Democratic Republic of the Congo": "COD",
+    "Denmark": "DNK",
+    "Djibouti": "DJI",
+    "Dominica": "DMA",
+    "Dominican Republic": "DOM",
+    "Eastern Africa": "F5101",
+    "Eastern Asia": "F5302",
+    "Eastern Asia (excluding Japan and China)": "F5829",
+    "Eastern Asia and South-eastern Asia": "F5307",
+    "Eastern Europe": "F5401",
+    "Ecuador": "ECU",
+    "Egypt": "EGY",
+    "El Salvador": "SLV",
+    "Equatorial Guinea": "GNQ",
+    "Eritrea": "ERI",
+    "Estonia": "EST",
+    "Eswatini": "SWZ",
+    "Ethiopia": "ETH",
+    "Ethiopia PDR": "F62",
+    "Europe": "F5400",
+    "Europe, Northern America, Australia and New Zealand": "nan",
+    "European Union (27)": "F5707",
+    "Falkland Islands (Malvinas)": "FLK",
+    "FAO Major Fishing Area: Atlantic, Eastern Central (14.4.1)": "F99029",
+    "FAO Major Fishing Area: Atlantic, Northeast (14.4.1)": "F99024",
+    "FAO Major Fishing Area: Atlantic, Northwest (14.4.1)": "F99023",
+    "FAO Major Fishing Area: Atlantic, Southeast (14.4.1)": "F99026",
+    "FAO Major Fishing Area: Atlantic, Southwest (14.4.1)": "F99030",
+    "FAO Major Fishing Area: Atlantic, Western Central (14.4.1)": "F99028",
+    "FAO Major Fishing Area: Indian Ocean, Eastern (14.4.1)": "F99025",
+    "FAO Major Fishing Area: Indian Ocean, Western (14.4.1)": "F99027",
+    "FAO Major Fishing Area: Mediterranean and Black Sea (14.4.1)": "F99032",
+    "FAO Major Fishing Area: Pacific, Eastern Central (14.4.1)": "F99018",
+    "FAO Major Fishing Area: Pacific, Northeast (14.4.1)": "F99019",
+    "FAO Major Fishing Area: Pacific, Northwest (14.4.1)": "F99020",
+    "FAO Major Fishing Area: Pacific, Southeast (14.4.1)": "F99031",
+    "FAO Major Fishing Area: Pacific, Southwest (14.4.1)": "F99022",
+    "FAO Major Fishing Area: Pacific, Western Central (14.4.1)": "F99021",
+    "Faroe Islands": "FRO",
+    "Fiji": "FJI",
+    "Finland": "FIN",
+    "France": "FRA",
+    "French Guiana": "GUF",
+    "French Polynesia": "PYF",
+    "French Southern Territories": "ATF",
+    "Gabon": "GAB",
+    "Gambia": "GMB",
+    "Georgia": "GEO",
+    "Germany": "DEU",
+    "Germany Fr": "F78",
+    "Germany Nl": "F77",
+    "Ghana": "GHA",
+    "Gibraltar": "GIB",
+    "Greece": "GRC",
+    "Greenland": "GRL",
+    "Grenada": "GRD",
+    "Guadeloupe": "GLP",
+    "Guam": "GUM",
+    "Guatemala": "GTM",
+    "Guernsey": "GGY",
+    "Guinea": "GIN",
+    "Guinea-Bissau": "GNB",
+    "Guyana": "GUY",
+    "Haiti": "HTI",
+    "Heard and McDonald Islands": "HMD",
+    "High-income economies": "F9010",
+    "Holy See": "VAT",
+    "Honduras": "HND",
+    "Hungary": "HUN",
+    "Iceland": "ISL",
+    "India": "IND",
+    "Indonesia": "IDN",
+    "International Centres (FAO) (2.5.1.a)": "F5823",
+    "Iran (Islamic Republic of)": "IRN",
+    "Iraq": "IRQ",
+    "Ireland": "IRL",
+    "Isle of Man": "IMN",
+    "Israel": "ISR",
+    "Italy": "ITA",
+    "Jamaica": "JAM",
+    "Japan": "JPN",
+    "Jersey": "JEY",
+    "Johnston Island": "JTN",
+    "Jordan": "JOR",
+    "Kazakhstan": "KAZ",
+    "Kenya": "KEN",
+    "Kiribati": "KIR",
+    "Kuwait": "KWT",
+    "Kyrgyzstan": "KGZ",
+    "Land Locked Developing Countries": "F5802",
+    "Lao People's Democratic Republic": "LAO",
+    "Latin America": "F348",
+    "Latin America and the Caribbean": "F5205",
+    "Latvia": "LVA",
+    "Least Developed Countries": "F5801",
+    "Lebanon": "LBN",
+    "Lesotho": "LSO",
+    "Liberia": "LBR",
+    "Libya": "LBY",
+    "Liechtenstein": "LIE",
+    "Lithuania": "LTU",
+    "Low income economies": "F5858",
+    "Low Income Food Deficit Countries": "F5815",
+    "Lower-middle-income economies": "F5859",
+    "Luxembourg": "LUX",
+    "Madagascar": "MDG",
+    "Malawi": "MWI",
+    "Malaysia": "MYS",
+    "Maldives": "MDV",
+    "Mali": "MLI",
+    "Malta": "MLT",
+    "Marshall Islands": "MHL",
+    "Martinique": "MTQ",
+    "Mauritania": "MRT",
+    "Mauritius": "MUS",
+    "Mayotte": "MYT",
+    "Melanesia": "F5502",
+    "Mexico": "MEX",
+    "Micronesia": "F5503",
+    "Micronesia (Federated States of)": "FSM",
+    "Middle Africa": "F5102",
+    "Midway Island": "MID",
+    "Monaco": "MCO",
+    "Mongolia": "MNG",
+    "Montenegro": "MNE",
+    "Montserrat": "MSR",
+    "Morocco": "MAR",
+    "Mozambique": "MOZ",
+    "Myanmar": "MMR",
+    "Namibia": "NAM",
+    "Nauru": "NRU",
+    "Nepal": "NPL",
+    "Net Food Importing Developing Countries": "F5817",
+    "Netherlands (Kingdom of the)": "NLD",
+    "Netherlands Antilles (former)": "ANT",
+    "New Caledonia": "NCL",
+    "New Zealand": "NZL",
+    "Nicaragua": "NIC",
+    "Niger": "NER",
+    "Nigeria": "NGA",
+    "Niue": "NIU",
+    "Non-Annex I countries": "F5849",
+    "Norfolk Island": "NFK",
+    "North and Central America": "F336",
+    "North Macedonia": "MKD",
+    "Northern Africa": "F5103",
+    "Northern Africa (excluding Sudan)": "F429",
+    "Northern America": "F5203",
+    "Northern America and Europe": "F5208",
+    "Northern Europe": "F5402",
+    "Northern Mariana Islands": "MNP",
+    "Norway": "NOR",
+    "Oceania": "F5500",
+    "Oceania excluding Australia and New Zealand": "F5807",
+    "OECD": "F5873",
+    "Oman": "OMN",
+    "Pacific Islands Trust Territory": "F164",
+    "Pakistan": "PAK",
+    "Palau": "PLW",
+    "Palestine": "PSE",
+    "Panama": "PAN",
+    "Papua New Guinea": "PNG",
+    "Paraguay": "PRY",
+    "Peru": "PER",
+    "Philippines": "PHL",
+    "Pitcairn": "PCN",
+    "Poland": "POL",
+    "Polynesia": "F5504",
+    "Portugal": "PRT",
+    "Puerto Rico": "PRI",
+    "Qatar": "QAT",
+    "Regional Centres (FAO) (2.5.1.a)": "F5822",
+    "Republic of Korea": "KOR",
+    "Republic of Moldova": "MDA",
+    "Réunion": "REU",
+    "Romania": "ROU",
+    "Russian Federation": "RUS",
+    "Rwanda": "RWA",
+    "Saint Barthélemy": "BLM",
+    "Saint Helena, Ascension and Tristan da Cunha": "SHN",
+    "Saint Kitts and Nevis": "KNA",
+    "Saint Lucia": "LCA",
+    "Saint Martin (French part)": "MAF",
+    "Saint Pierre and Miquelon": "SPM",
+    "Saint Vincent and the Grenadines": "VCT",
+    "Samoa": "WSM",
+    "San Marino": "SMR",
+    "Sao Tome and Principe": "STP",
+    "Sark": "F285",
+    "Saudi Arabia": "SAU",
+    "Senegal": "SEN",
+    "Serbia": "SRB",
+    "Serbia and Montenegro": "SCG",
+    "Seychelles": "SYC",
+    "Sierra Leone": "SLE",
+    "Singapore": "SGP",
+    "Sint Maarten (Dutch part)": "SXM",
+    "Slovakia": "SVK",
+    "Slovenia": "SVN",
+    "Small Island Developing States": "F5803",
+    "Solomon Islands": "SLB",
+    "Somalia": "SOM",
+    "South Africa": "ZAF",
+    "South America": "F5207",
+    "South Georgia and the South Sandwich Islands": "SGS",
+    "South Sudan": "SSD",
+    "South-eastern Asia": "F5304",
+    "Southern Africa": "F5104",
+    "Southern Asia": "F5303",
+    "Southern Asia (excluding India)": "F5855",
+    "Southern Europe": "F5403",
+    "Spain": "ESP",
+    "Sri Lanka": "LKA",
+    "Sub-Saharan Africa": "F420",
+    "Sub-Saharan Africa (including Sudan)": "F5810",
+    "Sudan": "SDN",
+    "Sudan (former)": "F206",
+    "Suriname": "SUR",
+    "Svalbard and Jan Mayen Islands": "SJM",
+    "Sweden": "SWE",
+    "Switzerland": "CHE",
+    "Syrian Arab Republic": "SYR",
+    "Tajikistan": "TJK",
+    "Thailand": "THA",
+    "Timor-Leste": "TLS",
+    "Togo": "TGO",
+    "Tokelau": "TKL",
+    "Tonga": "TON",
+    "Trinidad and Tobago": "TTO",
+    "Tunisia": "TUN",
+    "Türkiye": "TUR",
+    "Turkmenistan": "TKM",
+    "Turks and Caicos Islands": "TCA",
+    "Tuvalu": "TUV",
+    "Uganda": "UGA",
+    "Ukraine": "UKR",
+    "United Arab Emirates": "ARE",
+    "United Kingdom of Great Britain and Northern Ireland": "GBR",
+    "United Republic of Tanzania": "TZA",
+    "United States Minor Outlying Islands": "UMI",
+    "United States of America": "USA",
+    "United States Virgin Islands": "VIR",
+    "Upper-middle-income economies": "F9011",
+    "Uruguay": "URY",
+    "USSR": "F228",
+    "Uzbekistan": "UZB",
+    "Vanuatu": "VUT",
+    "Venezuela (Bolivarian Republic of)": "VEN",
+    "Viet Nam": "VNM",
+    "Wake Island": "WAK",
+    "Wallis and Futuna Islands": "WLF",
+    "Western Africa": "F5105",
+    "Western Asia": "F5305",
+    "Western Asia (exc. Armenia, Azerbaijan, Cyprus, Israel and Georgia)": "F5828",
+    "Western Asia and Northern Africa": "F5308",
+    "Western Europe": "F5404",
+    "Western Sahara": "ESH",
+    "World": "X01",
+    "Yemen": "YEM",
+    "Yemen Ar Rp": "F246",
+    "Yemen Dem": "F247",
+    "Yugoslav SFR": "F248",
+    "Zambia": "ZMB",
+    "Zimbabwe": "ZWE",
+    # reading the special characters (é, ô, ü etc.) fails for some domains
+    # todo there is probably a better way to solve this
+    "Côte d'Ivoire": "CIV",  # noqa: RUF001
+    "Curaçao": "CUW",
+    "Réunion": "REU",
+    "Türkiye": "TUR",
+}

+ 40 - 419
src/faostat_data_primap/helper/definitions.py

@@ -1,7 +1,5 @@
 """definitions like folders, mappings etc."""
 
-from pathlib import Path
-
 domains = {
     "farm_gate_emissions_crops": {
         "url_domain": "https://www.fao.org/faostat/en/#data/GCE",
@@ -40,45 +38,7 @@ domains = {
     },
 }
 
-
-def get_root_path(root_indicator: str = ".git") -> Path:
-    """
-    Traverse up from the current script location to find the repository root.
-
-    The root is defined by the presence of a root_indicator file or
-    directory (e.g., '.git').
-
-    Parameters
-    ----------
-        root_indicator
-            A filename or directory name that indicates the root of the repository.
-
-    Returns
-    -------
-    Path
-        The path to the root directory of the repository.
-
-    Raises
-    ------
-        RuntimeError: If the repository root cannot be found.
-    """
-    current_dir = Path(__file__).resolve().parent
-    while current_dir != Path(current_dir.root):
-        if (current_dir / root_indicator).exists():
-            return current_dir
-        current_dir = current_dir.parent
-    msg = f"Repository root with indicator '{root_indicator}' not found."
-    raise RuntimeError(msg)
-
-
-root_path = get_root_path()
-code_path = root_path / "src" / "faostat_data_primap"
-extracted_data_path = root_path / "extracted_data"
-downloaded_data_path = root_path / "downloaded_data"
-
-# data reading
 areas_to_remove_base = [
-    # We can aggregate these country groups ourselves if we need to
     "World",
     "Africa",
     "Eastern Africa",
@@ -115,20 +75,17 @@ areas_to_remove_base = [
     "Annex I countries",
     "Non-Annex I countries",
     "OECD",
-"Europe, Northern America, Australia and New Zealand",
+    "Europe, Northern America, Australia and New Zealand",
 ]
 
 read_config_all = {
     "farm_gate_agriculture_energy": {
         "2024-11-14": {
-            # todo is NOFLAG the right choice?
             "filename": "Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv",
-            # we don't need energy in Joule
-            # todo maybe explicitly deleting elements is better
-            "units_to_remove": ["TJ"],
             "areas_to_remove": [
                 *areas_to_remove_base,
             ],
+            "elements_to_remove": ["Energy use in agriculture"],
             "entity_mapping": {
                 "Emissions (CO2)": "CO2",
                 "Emissions (CH4)": "CH4",
@@ -151,14 +108,8 @@ read_config_all = {
             "areas_to_remove": [
                 *areas_to_remove_base,
                 "European Union (27)",
-                # This seems to be data for a Belgian province,
-                # I don't think we need it
-                "Belgium-Luxembourg",
-                # I'm not sure if we can downscale these two
-                "Serbia and Montenegro",
             ],
             "elements_to_remove": [
-                # all these elements are not emissions
                 "Crop residues (N content)",
                 "Burning crop residues (Biomass burned, dry matter)",
                 "Area harvested",
@@ -178,7 +129,10 @@ read_config_all = {
                 "Synthetic fertilizers (Emissions N2O)": "N2O",
                 "Synthetic fertilizers (Direct emissions N2O)": "N2O",
                 "Indirect emissions (N2O that leaches) (Synthetic fertilizers)": "N2O",
-                "Indirect emissions (N2O that volatilises) (Synthetic fertilizers)": "N2O",
+                (
+                    "Indirect emissions (N2O that volatilises) "
+                    "(Synthetic fertilizers)"
+                ): "N2O",
             },
             "columns_to_drop": [
                 "Element",
@@ -201,8 +155,6 @@ read_config_all = {
                 "Belgium-Luxembourg",
                 "Serbia and Montenegro",
                 "European Union (27)",
-                # drop duplicate country
-                "China, mainland",
             ],
             "elements_to_remove": [
                 "Stocks",  # number of animals
@@ -215,7 +167,6 @@ read_config_all = {
                 "Manure applied to soils that volatilises (N content)",
             ],
             "entity_mapping": {
-                # todo we could make this smarter and get the entity from the string
                 "Livestock total (Emissions N2O)": "N2O",
                 "Livestock total (Emissions CH4)": "CH4",
                 "Enteric fermentation (Emissions CH4)": "CH4",
@@ -255,12 +206,9 @@ read_config_all = {
                 "Belgium-Luxembourg",
                 "Serbia and Montenegro",
                 "European Union (27)",
-                # check todo channel islands belong to UK
-                "Channel Islands",
             ],
             "elements_to_remove": [
                 "Area",
-                # todo can we convert this into emissions?
                 "Net stock change (C)",
             ],
             "entity_mapping": {
@@ -284,11 +232,7 @@ read_config_all = {
             "filename": "Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
-                "Belgium-Luxembourg",
-                "Serbia and Montenegro",
                 "European Union (27)",
-                # check todo channel islands belong to UK
-                "Channel Islands",
             ],
             "elements_to_remove": ["Biomass burned (dry matter)", "Burned Area"],
             "entity_mapping": {
@@ -313,11 +257,11 @@ read_config_all = {
             "filename": "Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
-                "Belgium-Luxembourg",
-                "Serbia and Montenegro",
+                # "Belgium-Luxembourg",
+                # "Serbia and Montenegro",
                 "European Union (27)",
                 # "China" and "China, mainland" included with identical data
-                "China, mainland",
+                # "China, mainland",
             ],
             "elements_to_remove": [
                 "Area",
@@ -340,10 +284,10 @@ read_config_all = {
             "filename": "Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
-                "Belgium-Luxembourg",
-                "Serbia and Montenegro",
+                # "Belgium-Luxembourg",
+                # "Serbia and Montenegro",
                 "European Union (27)",
-                "Channel Islands",
+                # "Channel Islands",
             ],
             "elements_to_remove": [
                 "Energy Use (Total)",
@@ -370,357 +314,34 @@ read_config_all = {
             ],
         }
     },
+    "replace_units": {
+        "KYOTOGHG (AR5GWP100) * kt/ year": "CO2 * kt / year",
+        "FGASES (AR5GWP100) * kt/ year": "CO2 * kt/ year",
+    },
 }
 
-# from https://www.fao.org/faostat/en/#definitions
-country_to_iso3_mapping = {
-    "Afghanistan": "AFG",
-    "Africa": "X06",
-    "Åland Islands": "ALA",
-    "Albania": "ALB",
-    "Algeria": "DZA",
-    "American Samoa": "ASM",
-    "Americas": "X21",
-    "Andorra": "AND",
-    "Angola": "AGO",
-    "Anguilla": "AIA",
-    "Annex I countries": "F5848",
-    "Antarctic Region": "F5600",
-    "Antarctica": "ATA",
-    "Antigua and Barbuda": "ATG",
-    "Argentina": "ARG",
-    "Armenia": "ARM",
-    "Aruba": "ABW",
-    "Asia": "F5300",
-    "Australia": "AUS",
-    "Australia and New Zealand": "F5501",
-    "Austria": "AUT",
-    "Azerbaijan": "AZE",
-    "Bahamas": "BHS",
-    "Bahrain": "BHR",
-    "Bangladesh": "BGD",
-    "Barbados": "BRB",
-    "Belarus": "BLR",
-    "Belgium": "BEL",
-    "Belgium-Luxembourg": "F15",
-    "Belize": "BLZ",
-    "Benin": "BEN",
-    "Bermuda": "BMU",
-    "Bhutan": "BTN",
-    "Bolivia (Plurinational State of)": "BOL",
-    "Bonaire, Sint Eustatius and Saba": "BES",
-    "Bosnia and Herzegovina": "BIH",
-    "Botswana": "BWA",
-    "Bouvet Island": "BVT",
-    "Brazil": "BRA",
-    "British Virgin Islands": "VGB",
-    "Brunei Darussalam": "BRN",
-    "Bulgaria": "BGR",
-    "Burkina Faso": "BFA",
-    "Burundi": "BDI",
-    "Cabo Verde": "CPV",
-    "Cambodia": "KHM",
-    "Cameroon": "CMR",
-    "Canada": "CAN",
-    "Caribbean": "F5206",
-    "Caucasus and Central Asia": "F5857",
-    "Cayman Islands": "CYM",
-    "Central African Republic": "CAF",
-    "Central America": "F5204",
-    "Central Asia": "F5301",
-    "Central Asia and Southern Asia": "F5306",
-    "Chad": "TCD",
-    "Chagos Archipelago": "IOT",
-    "Channel Islands": "CHA",
-    "Chile": "CHL",
-    "China": "F351",
-    "China, Hong Kong SAR": "HKG",
-    "China, Macao SAR": "MAC",
-    "China, mainland": "CHN",
-    "China, Taiwan Province of": "TWN",
-    "Christmas Island": "CXR",
-    "Cocos (Keeling) Islands": "CCK",
-    "Colombia": "COL",
-    "Comoros": "COM",
-    "Congo": "COG",
-    "Cook Islands": "COK",
-    "Costa Rica": "CRI",
-    "Côte d'Ivoire": "CIV",
-    "Croatia": "HRV",
-    "Cuba": "CUB",
-    "Curaçao": "CUW",
-    "Cyprus": "CYP",
-    "Czechia": "CZE",
-    "Czechoslovakia": "F51",
-    "Democratic People's Republic of Korea": "PRK",
-    "Democratic Republic of the Congo": "COD",
-    "Denmark": "DNK",
-    "Djibouti": "DJI",
-    "Dominica": "DMA",
-    "Dominican Republic": "DOM",
-    "Eastern Africa": "F5101",
-    "Eastern Asia": "F5302",
-    "Eastern Asia (excluding Japan and China)": "F5829",
-    "Eastern Asia and South-eastern Asia": "F5307",
-    "Eastern Europe": "F5401",
-    "Ecuador": "ECU",
-    "Egypt": "EGY",
-    "El Salvador": "SLV",
-    "Equatorial Guinea": "GNQ",
-    "Eritrea": "ERI",
-    "Estonia": "EST",
-    "Eswatini": "SWZ",
-    "Ethiopia": "ETH",
-    "Ethiopia PDR": "F62",
-    "Europe": "F5400",
-    "Europe, Northern America, Australia and New Zealand": "nan",
-    "European Union (27)": "F5707",
-    "Falkland Islands (Malvinas)": "FLK",
-    "FAO Major Fishing Area: Atlantic, Eastern Central (14.4.1)": "F99029",
-    "FAO Major Fishing Area: Atlantic, Northeast (14.4.1)": "F99024",
-    "FAO Major Fishing Area: Atlantic, Northwest (14.4.1)": "F99023",
-    "FAO Major Fishing Area: Atlantic, Southeast (14.4.1)": "F99026",
-    "FAO Major Fishing Area: Atlantic, Southwest (14.4.1)": "F99030",
-    "FAO Major Fishing Area: Atlantic, Western Central (14.4.1)": "F99028",
-    "FAO Major Fishing Area: Indian Ocean, Eastern (14.4.1)": "F99025",
-    "FAO Major Fishing Area: Indian Ocean, Western (14.4.1)": "F99027",
-    "FAO Major Fishing Area: Mediterranean and Black Sea (14.4.1)": "F99032",
-    "FAO Major Fishing Area: Pacific, Eastern Central (14.4.1)": "F99018",
-    "FAO Major Fishing Area: Pacific, Northeast (14.4.1)": "F99019",
-    "FAO Major Fishing Area: Pacific, Northwest (14.4.1)": "F99020",
-    "FAO Major Fishing Area: Pacific, Southeast (14.4.1)": "F99031",
-    "FAO Major Fishing Area: Pacific, Southwest (14.4.1)": "F99022",
-    "FAO Major Fishing Area: Pacific, Western Central (14.4.1)": "F99021",
-    "Faroe Islands": "FRO",
-    "Fiji": "FJI",
-    "Finland": "FIN",
-    "France": "FRA",
-    "French Guiana": "GUF",
-    "French Polynesia": "PYF",
-    "French Southern Territories": "ATF",
-    "Gabon": "GAB",
-    "Gambia": "GMB",
-    "Georgia": "GEO",
-    "Germany": "DEU",
-    "Germany Fr": "F78",
-    "Germany Nl": "F77",
-    "Ghana": "GHA",
-    "Gibraltar": "GIB",
-    "Greece": "GRC",
-    "Greenland": "GRL",
-    "Grenada": "GRD",
-    "Guadeloupe": "GLP",
-    "Guam": "GUM",
-    "Guatemala": "GTM",
-    "Guernsey": "GGY",
-    "Guinea": "GIN",
-    "Guinea-Bissau": "GNB",
-    "Guyana": "GUY",
-    "Haiti": "HTI",
-    "Heard and McDonald Islands": "HMD",
-    "High-income economies": "F9010",
-    "Holy See": "VAT",
-    "Honduras": "HND",
-    "Hungary": "HUN",
-    "Iceland": "ISL",
-    "India": "IND",
-    "Indonesia": "IDN",
-    "International Centres (FAO) (2.5.1.a)": "F5823",
-    "Iran (Islamic Republic of)": "IRN",
-    "Iraq": "IRQ",
-    "Ireland": "IRL",
-    "Isle of Man": "IMN",
-    "Israel": "ISR",
-    "Italy": "ITA",
-    "Jamaica": "JAM",
-    "Japan": "JPN",
-    "Jersey": "JEY",
-    "Johnston Island": "JTN",
-    "Jordan": "JOR",
-    "Kazakhstan": "KAZ",
-    "Kenya": "KEN",
-    "Kiribati": "KIR",
-    "Kuwait": "KWT",
-    "Kyrgyzstan": "KGZ",
-    "Land Locked Developing Countries": "F5802",
-    "Lao People's Democratic Republic": "LAO",
-    "Latin America": "F348",
-    "Latin America and the Caribbean": "F5205",
-    "Latvia": "LVA",
-    "Least Developed Countries": "F5801",
-    "Lebanon": "LBN",
-    "Lesotho": "LSO",
-    "Liberia": "LBR",
-    "Libya": "LBY",
-    "Liechtenstein": "LIE",
-    "Lithuania": "LTU",
-    "Low income economies": "F5858",
-    "Low Income Food Deficit Countries": "F5815",
-    "Lower-middle-income economies": "F5859",
-    "Luxembourg": "LUX",
-    "Madagascar": "MDG",
-    "Malawi": "MWI",
-    "Malaysia": "MYS",
-    "Maldives": "MDV",
-    "Mali": "MLI",
-    "Malta": "MLT",
-    "Marshall Islands": "MHL",
-    "Martinique": "MTQ",
-    "Mauritania": "MRT",
-    "Mauritius": "MUS",
-    "Mayotte": "MYT",
-    "Melanesia": "F5502",
-    "Mexico": "MEX",
-    "Micronesia": "F5503",
-    "Micronesia (Federated States of)": "FSM",
-    "Middle Africa": "F5102",
-    "Midway Island": "MID",
-    "Monaco": "MCO",
-    "Mongolia": "MNG",
-    "Montenegro": "MNE",
-    "Montserrat": "MSR",
-    "Morocco": "MAR",
-    "Mozambique": "MOZ",
-    "Myanmar": "MMR",
-    "Namibia": "NAM",
-    "Nauru": "NRU",
-    "Nepal": "NPL",
-    "Net Food Importing Developing Countries": "F5817",
-    "Netherlands (Kingdom of the)": "NLD",
-    "Netherlands Antilles (former)": "ANT",
-    "New Caledonia": "NCL",
-    "New Zealand": "NZL",
-    "Nicaragua": "NIC",
-    "Niger": "NER",
-    "Nigeria": "NGA",
-    "Niue": "NIU",
-    "Non-Annex I countries": "F5849",
-    "Norfolk Island": "NFK",
-    "North and Central America": "F336",
-    "North Macedonia": "MKD",
-    "Northern Africa": "F5103",
-    "Northern Africa (excluding Sudan)": "F429",
-    "Northern America": "F5203",
-    "Northern America and Europe": "F5208",
-    "Northern Europe": "F5402",
-    "Northern Mariana Islands": "MNP",
-    "Norway": "NOR",
-    "Oceania": "F5500",
-    "Oceania excluding Australia and New Zealand": "F5807",
-    "OECD": "F5873",
-    "Oman": "OMN",
-    "Pacific Islands Trust Territory": "F164",
-    "Pakistan": "PAK",
-    "Palau": "PLW",
-    "Palestine": "PSE",
-    "Panama": "PAN",
-    "Papua New Guinea": "PNG",
-    "Paraguay": "PRY",
-    "Peru": "PER",
-    "Philippines": "PHL",
-    "Pitcairn": "PCN",
-    "Poland": "POL",
-    "Polynesia": "F5504",
-    "Portugal": "PRT",
-    "Puerto Rico": "PRI",
-    "Qatar": "QAT",
-    "Regional Centres (FAO) (2.5.1.a)": "F5822",
-    "Republic of Korea": "KOR",
-    "Republic of Moldova": "MDA",
-    "Réunion": "REU",
-    "Romania": "ROU",
-    "Russian Federation": "RUS",
-    "Rwanda": "RWA",
-    "Saint Barthélemy": "BLM",
-    "Saint Helena, Ascension and Tristan da Cunha": "SHN",
-    "Saint Kitts and Nevis": "KNA",
-    "Saint Lucia": "LCA",
-    "Saint Martin (French part)": "MAF",
-    "Saint Pierre and Miquelon": "SPM",
-    "Saint Vincent and the Grenadines": "VCT",
-    "Samoa": "WSM",
-    "San Marino": "SMR",
-    "Sao Tome and Principe": "STP",
-    "Sark": "F285",
-    "Saudi Arabia": "SAU",
-    "Senegal": "SEN",
-    "Serbia": "SRB",
-    "Serbia and Montenegro": "SCG",
-    "Seychelles": "SYC",
-    "Sierra Leone": "SLE",
-    "Singapore": "SGP",
-    "Sint Maarten (Dutch part)": "SXM",
-    "Slovakia": "SVK",
-    "Slovenia": "SVN",
-    "Small Island Developing States": "F5803",
-    "Solomon Islands": "SLB",
-    "Somalia": "SOM",
-    "South Africa": "ZAF",
-    "South America": "F5207",
-    "South Georgia and the South Sandwich Islands": "SGS",
-    "South Sudan": "SSD",
-    "South-eastern Asia": "F5304",
-    "Southern Africa": "F5104",
-    "Southern Asia": "F5303",
-    "Southern Asia (excluding India)": "F5855",
-    "Southern Europe": "F5403",
-    "Spain": "ESP",
-    "Sri Lanka": "LKA",
-    "Sub-Saharan Africa": "F420",
-    "Sub-Saharan Africa (including Sudan)": "F5810",
-    "Sudan": "SDN",
-    "Sudan (former)": "F206",
-    "Suriname": "SUR",
-    "Svalbard and Jan Mayen Islands": "SJM",
-    "Sweden": "SWE",
-    "Switzerland": "CHE",
-    "Syrian Arab Republic": "SYR",
-    "Tajikistan": "TJK",
-    "Thailand": "THA",
-    "Timor-Leste": "TLS",
-    "Togo": "TGO",
-    "Tokelau": "TKL",
-    "Tonga": "TON",
-    "Trinidad and Tobago": "TTO",
-    "Tunisia": "TUN",
-    "Türkiye": "TUR",
-    "Turkmenistan": "TKM",
-    "Turks and Caicos Islands": "TCA",
-    "Tuvalu": "TUV",
-    "Uganda": "UGA",
-    "Ukraine": "UKR",
-    "United Arab Emirates": "ARE",
-    "United Kingdom of Great Britain and Northern Ireland": "GBR",
-    "United Republic of Tanzania": "TZA",
-    "United States Minor Outlying Islands": "UMI",
-    "United States of America": "USA",
-    "United States Virgin Islands": "VIR",
-    "Upper-middle-income economies": "F9011",
-    "Uruguay": "URY",
-    "USSR": "F228",
-    "Uzbekistan": "UZB",
-    "Vanuatu": "VUT",
-    "Venezuela (Bolivarian Republic of)": "VEN",
-    "Viet Nam": "VNM",
-    "Wake Island": "WAK",
-    "Wallis and Futuna Islands": "WLF",
-    "Western Africa": "F5105",
-    "Western Asia": "F5305",
-    "Western Asia (exc. Armenia, Azerbaijan, Cyprus, Israel and Georgia)": "F5828",
-    "Western Asia and Northern Africa": "F5308",
-    "Western Europe": "F5404",
-    "Western Sahara": "ESH",
-    "World": "X01",
-    "Yemen": "YEM",
-    "Yemen Ar Rp": "F246",
-    "Yemen Dem": "F247",
-    "Yugoslav SFR": "F248",
-    "Zambia": "ZMB",
-    "Zimbabwe": "ZWE",
-    # reading the special characters (é, ô, ü etc.) fails for some domains
-    # todo there is probably a better way to solve this
-    "Côte d'Ivoire" : "CIV",
-    "Curaçao" : "CUW",
-    "Réunion" : "REU",
-    "Türkiye" : "TUR",
+config_to_if = {
+    "coords_cols": {
+        "area": "country (ISO3)",
+        "unit": "Unit",
+        "entity": "entity",
+        "source": "Source",
+        "category": "category",
+    },
+    "coords_terminologies": {"area": "ISO3", "category": "FAOSTAT", "scenario": "FAO"},
+    "coords_value_mapping": {},
+    "filter_keep": {},
+    "filter_remove": {},
+    "meta_data": {
+        "references": "https://www.fao.org/faostat",
+        "rights": "Creative Commons Attribution-4.0 International licence (CC BY 4.0)",
+        "contact": "daniel.busch@climate-resource.com",
+        "title": "Agrifood systems emissions",
+        "comment": (
+            "Published by Food and Agriculture Organization of the "
+            "United Nations (FAO), converted to PRIMAP2 format by "
+            "Daniel Busch"
+        ),
+        "institution": ("Food and Agriculture Organization of the United Nations"),
+    },
 }

+ 38 - 0
src/faostat_data_primap/helper/paths.py

@@ -0,0 +1,38 @@
+"""Root path and data paths definition"""
+from pathlib import Path
+
+
+def get_root_path(root_indicator: str = ".git") -> Path:
+    """
+    Traverse up from the current script location to find the repository root.
+
+    The root is defined by the presence of a root_indicator file or
+    directory (e.g., '.git').
+
+    Parameters
+    ----------
+        root_indicator
+            A filename or directory name that indicates the root of the repository.
+
+    Returns
+    -------
+    Path
+        The path to the root directory of the repository.
+
+    Raises
+    ------
+        RuntimeError: If the repository root cannot be found.
+    """
+    current_dir = Path(__file__).resolve().parent
+    while current_dir != Path(current_dir.root):
+        if (current_dir / root_indicator).exists():
+            return current_dir
+        current_dir = current_dir.parent
+    msg = f"Repository root with indicator '{root_indicator}' not found."
+    raise RuntimeError(msg)
+
+
+root_path = get_root_path()
+code_path = root_path / "src" / "faostat_data_primap"
+extracted_data_path = root_path / "extracted_data"
+downloaded_data_path = root_path / "downloaded_data"

+ 170 - 143
src/faostat_data_primap/read.py

@@ -1,158 +1,185 @@
 """read data set"""
 
+import os
+import pathlib
+
 import pandas as pd
 import primap2 as pm2
 
+from src.faostat_data_primap.helper.country_mapping import country_to_iso3_mapping
 from src.faostat_data_primap.helper.definitions import (
-    country_to_iso3_mapping,
-    downloaded_data_path,
+    config_to_if,
     read_config_all,
 )
-
-files_to_read = (
-    (
-        "farm_gate_agriculture_energy",
-        "2024-11-14",
-    ),
-    (
-        "farm_gate_emissions_crops",
-        "2024-11-14",
-    ),
-    (
-        "farm_gate_livestock",
-        "2024-11-14",
-    ),
-    (
-        "land_use_drained_organic_soils",
-        "2023-11-09",
-    ),
-    (
-        "land_use_fires",
-        "2023-11-09",
-    ),
-    (
-        "land_use_forests",
-        "2024-11-14",
-    ),
-    (
-        "pre_post_agricultural_production",
-        "2023-11-09",
-    ),
+from src.faostat_data_primap.helper.paths import (
+    downloaded_data_path,
+    extracted_data_path,
 )
 
-df_all = None
-country_mapping = {}
-# todo remove reversed, I'm using it to get the new domain first in the debugger
-for domain, release in reversed(files_to_read):
-    read_config = read_config_all[domain][release]
-
-    print(f"Read {read_config["filename"]}")
-    dataset_path = downloaded_data_path / domain / release / read_config["filename"]
-    # There are some non-utf8 characters in Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
-    df_domain = pd.read_csv(dataset_path, encoding="ISO-8859-1")
-
-    # remove rows by unit
-    # todo this is maybe not a good idea as it hides the elements to be removed
-    if "units_to_remove" in read_config.keys():
-        df_domain = df_domain[~df_domain["Unit"].isin(read_config["units_to_remove"])]
-
-    # remove rows by element
-    if "elements_to_remove" in read_config.keys():
-        df_domain = df_domain[
-            ~df_domain["Element"].isin(read_config["elements_to_remove"])
-        ]
-
-    # remove rows by area
-    if "areas_to_remove" in read_config.keys():
-        df_domain = df_domain[~df_domain["Area"].isin(read_config["areas_to_remove"])]
-
-    # create country columns
-    df_domain["country (ISO3)"] = df_domain["Area"].map(country_to_iso3_mapping)
-
-    # check all countries are converted into iso3 codes
-    if any(df_domain["country (ISO3)"].isna()):
-        raise ValueError
-
-    # create entity column
-    df_domain["entity"] = df_domain["Element"].map(read_config["entity_mapping"])
-
-    # create category column (combination of Item and Element works best)
-    df_domain["category"] = df_domain["Item"] + " " + df_domain["Element"]
-
-    # drop columns we don't need
-    df_domain = df_domain.drop(
-        read_config["columns_to_drop"],
-        axis=1,
-    )
 
-    if df_all is None:
-        df_all = df_domain
-    else:
-        # makes sure there are no duplicate category names
-        if any(
-            [
-                category in df_all["category"].unique()
-                for category in df_domain["category"].unique()
+def get_all_domains(downloaded_data_path: pathlib.Path) -> list[str]:
+    """
+    Get a list of all available domains.
+
+    Parameters
+    ----------
+    downloaded_data_path
+        The path to the downloaded data sets.
+
+    Returns
+    -------
+        All domains in the downloaded data directory.
+
+    """
+    return [
+        domain
+        for domain in os.listdir(downloaded_data_path)
+        if (downloaded_data_path / domain).is_dir()
+    ]
+
+
+def get_latest_release(domain_path) -> str:
+    """
+    Get the latest release in a domain directory.
+
+    Parameters
+    ----------
+    domain_path
+        The path to the domain
+
+    Returns
+    -------
+    Name of the directory with latest data.
+
+    """
+    all_releases = [
+        release_name
+        for release_name in os.listdir(domain_path)
+        if (domain_path / release_name).is_dir()
+    ]
+    return sorted(all_releases, reverse=True)[0]
+
+
+def read_latest_data() -> None:
+    """
+    Read and save the latest data
+
+    Converts downloaded data into interchange format and primap2 native format
+    and saves the files in the extracted_data directory.
+
+    """
+    domains = get_all_domains(downloaded_data_path)
+
+    files_to_read = []
+    for domain in domains:
+        domain_path = downloaded_data_path / domain
+        files_to_read.append((domain, get_latest_release(domain_path)))
+
+    df_all = None
+    for domain, release in files_to_read:
+        read_config = read_config_all[domain][release]
+
+        print(f"Read {read_config["filename"]}")
+        dataset_path = downloaded_data_path / domain / release / read_config["filename"]
+
+        # There are some non-utf8 characters
+        df_domain = pd.read_csv(dataset_path, encoding="ISO-8859-1")
+
+        # remove rows by element
+        if "elements_to_remove" in read_config.keys():
+            df_domain = df_domain[
+                ~df_domain["Element"].isin(read_config["elements_to_remove"])
             ]
-        ):
-            msg = f"Duplicate category names for {domain}"
+
+        # remove rows by area
+        if "areas_to_remove" in read_config.keys():
+            df_domain = df_domain[
+                ~df_domain["Area"].isin(read_config["areas_to_remove"])
+            ]
+
+        # create country columns
+        df_domain["country (ISO3)"] = df_domain["Area"].map(country_to_iso3_mapping)
+
+        # check all countries are converted into iso3 codes
+        if any(df_domain["country (ISO3)"].isna()):
+            msg = "Not all countries are converted into ISO3 codes"
             raise ValueError(msg)
-        df_all = pd.concat(
-            [df_all, df_domain],
-            axis=0,
-            join="outer",
-        ).reset_index(drop=True)
-
-# df_all = df_all.drop(labels=["Source"], axis=1)
-df_all["Source"] = df_all["Source"].fillna("unknown")
-coords_cols = {
-    "area": "country (ISO3)",
-    "unit": "Unit",
-    "entity": "entity",
-    "source": "Source",
-    "category" : "category"
-}
-
-coords_terminologies = {"area": "ISO3", "category": "FAOSTAT", "scenario": "FAO"}
-
-coords_defaults = {
-    # "source": "FAO",
-    "scenario": release,
-}
-
-coords_value_mapping = {}
-filter_keep = {}
-filter_remove = {}
-meta_data = {
-    "references": "https://www.fao.org/faostat",
-    "rights": "Creative Commons Attribution-4.0 International licence (CC BY 4.0)",
-    "contact": "daniel.busch@climate-resource.com",
-    "title": "Agrifood systems emissions",
-    "comment": (
-        "Published by Food and Agriculture Organization of the "
-        "United Nations (FAO), converted to PRIMAP2 format by "
-        "Daniel Busch"
-    ),
-    "institution": ("Food and Agriculture Organization of the United Nations"),
-}
-# Rename columns to remove the "Y" prefix
-df_all = df_all.rename(columns=lambda x: x.lstrip("Y") if x.startswith("Y") else x)
-df_all[df_all["entity"].isin(['FGASES (AR5GWP100)', 'KYOTOGHG (AR5GWP100)'])]["unit"]
-
-
-data_if = pm2.pm2io.convert_wide_dataframe_if(
-    df_all,
-    coords_cols=coords_cols,
-    coords_defaults=coords_defaults,
-    coords_terminologies=coords_terminologies,
-    coords_value_mapping=coords_value_mapping,
-    filter_keep=filter_keep,
-    filter_remove=filter_remove,
-    meta_data=meta_data,
-)
 
-# convert to PRIMAP2 native format
-data_pm2 = pm2.pm2io.from_interchange_format(data_if, data_if.attrs)
+        # create entity column
+        df_domain["entity"] = df_domain["Element"].map(read_config["entity_mapping"])
+
+        # create category column (combination of Item and Element works best)
+        df_domain["category"] = df_domain["Item"] + "-" + df_domain["Element"]
+
+        # drop columns we don't need
+        df_domain = df_domain.drop(
+            read_config["columns_to_drop"],
+            axis=1,
+        )
+
+        if df_all is None:
+            df_all = df_domain
+        else:
+            # makes sure there are no duplicate category names
+            if any(
+                [
+                    category in df_all["category"].unique()
+                    for category in df_domain["category"].unique()
+                ]
+            ):
+                msg = f"Duplicate category names for {domain}"
+                raise ValueError(msg)
+            df_all = pd.concat(
+                [df_all, df_domain],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    df_all["Source"] = df_all["Source"].fillna("unknown")
+
+    # Rename columns to remove the "Y" prefix
+    df_all = df_all.rename(columns=lambda x: x.lstrip("Y") if x.startswith("Y") else x)
+
+    # Make sure the units are correct
+    df_all["Unit"] = df_all["entity"] + " * " + df_all["Unit"] + "/ year"
+    df_all["Unit"] = df_all["Unit"].replace(read_config_all["replace_units"])
+
+    date_last_updated = sorted([i[1] for i in files_to_read], reverse=True)[0]
+    release_name = f"v{date_last_updated}"
+
+    data_if = pm2.pm2io.convert_wide_dataframe_if(
+        df_all,
+        coords_cols=config_to_if["coords_cols"],
+        coords_defaults={
+            "scenario": release_name,
+        },
+        coords_terminologies=config_to_if["coords_terminologies"],
+        coords_value_mapping=config_to_if["coords_value_mapping"],
+        filter_keep=config_to_if["filter_keep"],
+        filter_remove=config_to_if["filter_remove"],
+        meta_data=config_to_if["meta_data"],
+    )
+
+    # convert to PRIMAP2 native format
+    data_pm2 = pm2.pm2io.from_interchange_format(data_if, data_if.attrs)
+
+    # convert back to IF for standardized units
+    data_if = data_pm2.pr.to_interchange_format()
+
+    # save raw data
+    output_filename = f"FAOSTAT_Agrifood_system_emissions_v{release_name}"
+
+    if not extracted_data_path.exists():
+        extracted_data_path.mkdir()
+
+    output_folder = extracted_data_path / release_name
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + ".csv"), data_if
+    )
 
-# convert back to IF for standardized units
-data_if = data_pm2.pr.to_interchange_format()
+    compression = dict(zlib=True, complevel=9)
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)