Daniel Busch před 5 měsíci
rodič
revize
854a0e1a43
29 změnil soubory, kde provedl 145 přidání a 14 odebrání
  1. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_All_Data.csv
  2. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
  3. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_AreaCodes.csv
  4. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Elements.csv
  5. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Flags.csv
  6. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_ItemCodes.csv
  7. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Sources.csv
  8. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/GV_e.pdf
  9. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-14/land_use_drained_organic_soils.zip
  10. 1 0
      downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_All_Data.csv
  11. 1 0
      downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv
  12. 1 0
      downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_AreaCodes.csv
  13. 1 0
      downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_Flags.csv
  14. 1 0
      downloaded_data/land_use_fires/2024-11-14/GI_e.pdf
  15. 1 0
      downloaded_data/land_use_fires/2024-11-14/land_use_fires.zip
  16. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_All_Data.csv
  17. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv
  18. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_AreaCodes.csv
  19. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_Elements.csv
  20. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_Flags.csv
  21. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_ItemCodes.csv
  22. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/README_Methodological_Note.pdf
  23. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-14/pre_post_agricultural_production.zip
  24. 1 1
      extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv
  25. 1 1
      extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.yaml
  26. 86 10
      src/faostat_data_primap/helper/definitions.py
  27. 8 2
      src/faostat_data_primap/read.py
  28. 24 0
      tests/integration/test_download_script.py
  29. 2 0
      tests/integration/test_read_script.py

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/2W/3j/MD5E-s1789472--9b1b70d493bd45c969873db741870714.csv/MD5E-s1789472--9b1b70d493bd45c969873db741870714.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/7z/Gp/MD5E-s1318611--e61ef5560584cecbb28df6f4aa8baa5c.csv/MD5E-s1318611--e61ef5560584cecbb28df6f4aa8baa5c.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/zq/54/MD5E-s6297--c1ea968e8385286d309e132b8e2611ac.csv/MD5E-s6297--c1ea968e8385286d309e132b8e2611ac.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Elements.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/vw/jG/MD5E-s106--60e8c6a610e46a3bf5e04900d202f48d.csv/MD5E-s106--60e8c6a610e46a3bf5e04900d202f48d.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/71/Zk/MD5E-s75--09581f1851359d0feea6ddd1d8d10459.csv/MD5E-s75--09581f1851359d0feea6ddd1d8d10459.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_ItemCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/67/Fx/MD5E-s28--d1671e41eed70275d48271edcd20f669.csv/MD5E-s28--d1671e41eed70275d48271edcd20f669.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/Emissions_Drained_Organic_Soils_E_Sources.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/4M/P5/MD5E-s52--912f83275943beae43d78606c2ca4fd5.csv/MD5E-s52--912f83275943beae43d78606c2ca4fd5.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/GV_e.pdf

@@ -0,0 +1 @@
+../../../.git/annex/objects/4X/5G/MD5E-s380124--5230c1e42e8c86625ea34e5d9ab8eae6.pdf/MD5E-s380124--5230c1e42e8c86625ea34e5d9ab8eae6.pdf

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-14/land_use_drained_organic_soils.zip

@@ -0,0 +1 @@
+../../../.git/annex/objects/gj/4W/MD5E-s315732--b93ad0cdcf1050dde0e354556e11cea9.zip/MD5E-s315732--b93ad0cdcf1050dde0e354556e11cea9.zip

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/G2/vg/MD5E-s8785882--e1db7344ac55235e18b4cf9780c3d32f.csv/MD5E-s8785882--e1db7344ac55235e18b4cf9780c3d32f.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/6q/99/MD5E-s6585458--28920044dbc04bcef32a24a6da092e10.csv/MD5E-s6585458--28920044dbc04bcef32a24a6da092e10.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/Emissions_Land_Use_Fires_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/GI_e.pdf

@@ -0,0 +1 @@
+../../../.git/annex/objects/mj/w1/MD5E-s275357--6969bd177d6f7e5c246465b44ec70847.pdf/MD5E-s275357--6969bd177d6f7e5c246465b44ec70847.pdf

+ 1 - 0
downloaded_data/land_use_fires/2024-11-14/land_use_fires.zip

@@ -0,0 +1 @@
+../../../.git/annex/objects/Kx/X4/MD5E-s1801886--e5309bc253453ef55121514a25076af7.zip/MD5E-s1801886--e5309bc253453ef55121514a25076af7.zip

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/Vf/MW/MD5E-s10263223--eea11a9eb0b45a07fbe07245eec86c72.csv/MD5E-s10263223--eea11a9eb0b45a07fbe07245eec86c72.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/g9/XW/MD5E-s8165056--07c4185c77f1b25b5bfbd63ebf04c987.csv/MD5E-s8165056--07c4185c77f1b25b5bfbd63ebf04c987.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/8Z/Wm/MD5E-s6907--2b7d9ad063cd6870b08206e453151369.csv/MD5E-s6907--2b7d9ad063cd6870b08206e453151369.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_Elements.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/VW/4j/MD5E-s367--12a18f4fac6a09743e80657a6656d562.csv/MD5E-s367--12a18f4fac6a09743e80657a6656d562.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/x2/v1/MD5E-s56--f761b847385e4153242bfa46c3a54937.csv/MD5E-s56--f761b847385e4153242bfa46c3a54937.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/Emissions_Pre_Post_Production_E_ItemCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/67/Fx/MD5E-s28--d1671e41eed70275d48271edcd20f669.csv/MD5E-s28--d1671e41eed70275d48271edcd20f669.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/README_Methodological_Note.pdf

@@ -0,0 +1 @@
+/Users/danielbusch/Documents/FAOSTAT_data_primap/downloaded_data/pre_post_agricultural_production/2023-11-09/README_Methodological_Note.pdf

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-14/pre_post_agricultural_production.zip

@@ -0,0 +1 @@
+../../../.git/annex/objects/MW/PQ/MD5E-s4214911--6fbc0e4d60b6e778344b3e434890a517.zip/MD5E-s4214911--6fbc0e4d60b6e778344b3e434890a517.zip

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/jk/vw/MD5E-s45864107--75346cc2d78e613e7610054027b892c3.csv/MD5E-s45864107--75346cc2d78e613e7610054027b892c3.csv
+../../.git/annex/objects/5f/mq/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.yaml

@@ -13,9 +13,9 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - source
   - area (ISO3)
   - scenario (FAO)
+  - source
   - category (FAOSTAT)
   - entity
   - unit

+ 86 - 10
src/faostat_data_primap/helper/definitions.py

@@ -225,7 +225,34 @@ read_config_all = {
                 "Area Code",
                 "Source Code",
             ],
-        }
+        },
+        "2024-11-14": {
+            "filename": "Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+            ],
+            "elements_to_remove": [
+                "Area",
+                "Net stock change (C)",
+            ],
+            "entity_mapping": {
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2)": "CO2",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Source Code",
+            ],
+        },
     },
     "land_use_fires": {
         "2023-11-09": {
@@ -250,18 +277,40 @@ read_config_all = {
                 "Area Code",
                 "Source Code",
             ],
-        }
+        },
+        "2024-11-14": {
+            "filename": "Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "European Union (27)",
+            ],
+            "elements_to_remove": [
+                "Burning crop residues (Biomass burned, dry matter)",
+                "Burned Area",
+            ],
+            "entity_mapping": {
+                "Emissions (CH4)": "CH4",
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2)": "CO2",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Source Code",
+            ],
+        },
     },
     "land_use_forests": {
         "2024-11-14": {
             "filename": "Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
-                # "Belgium-Luxembourg",
-                # "Serbia and Montenegro",
                 "European Union (27)",
-                # "China" and "China, mainland" included with identical data
-                # "China, mainland",
             ],
             "elements_to_remove": [
                 "Area",
@@ -284,10 +333,7 @@ read_config_all = {
             "filename": "Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
-                # "Belgium-Luxembourg",
-                # "Serbia and Montenegro",
                 "European Union (27)",
-                # "Channel Islands",
             ],
             "elements_to_remove": [
                 "Energy Use (Total)",
@@ -312,7 +358,37 @@ read_config_all = {
                 "Area",
                 "Area Code",
             ],
-        }
+        },
+        "2024-11-14": {
+            "filename": "Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "European Union (27)",
+            ],
+            "elements_to_remove": [
+                "Energy Use (Total)",
+                "Energy Use (Electricity)",
+                "Energy Use (Natural Gas, including LNG)",
+                "Energy Use (Heat)",
+                "Energy Use (Coal)",
+            ],
+            "entity_mapping": {
+                "Emissions (CO2)": "CO2",
+                "Emissions (CO2eq) (AR5)": "KYOTOGHG (AR5GWP100)",
+                "Emissions (CH4)": "CH4",
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2eq) from F-gases (AR5)": "FGASES (AR5GWP100)",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+            ],
+        },
     },
     "replace_units": {
         "KYOTOGHG (AR5GWP100) * kt/ year": "CO2 * kt / year",

+ 8 - 2
src/faostat_data_primap/read.py

@@ -105,12 +105,17 @@ def read_latest_data(
 
         # check all countries are converted into iso3 codes
         if any(df_domain["country (ISO3)"].isna()):
-            msg = "Not all countries are converted into ISO3 codes"
+            msg = f"Not all countries are converted into ISO3 codes for {domain}"
             raise ValueError(msg)
 
         # create entity column
         df_domain["entity"] = df_domain["Element"].map(read_config["entity_mapping"])
 
+        # check all entities are mapped
+        if any(df_domain["entity"].isna()):
+            msg = f"Not all entities are mapped for {domain}"
+            raise ValueError(msg)
+
         # create category column (combination of Item and Element works best)
         df_domain["category"] = df_domain["Item"] + "-" + df_domain["Element"]
 
@@ -172,7 +177,8 @@ def read_latest_data(
         output_folder.mkdir()
 
     pm2.pm2io.write_interchange_format(
-        output_folder / (output_filename + ".csv"), data_if
+        output_folder / (output_filename + ".csv"),
+        data_if,
     )
 
     compression = dict(zlib=True, complevel=9)

+ 24 - 0
tests/integration/test_download_script.py

@@ -1,6 +1,7 @@
 import os
 
 from src.faostat_data_primap.download import download_all_domains
+from src.faostat_data_primap.read import read_latest_data
 
 
 # test the whole download script run
@@ -30,3 +31,26 @@ def test_download_all_domains(tmp_path):
             assert [f for f in downloaded_data if f.endswith(".zip")]
 
     assert sorted(expected_downloaded_domains) == sorted(domains)
+
+    extracted_data_path = tmp_path / "extracted_data"
+
+    # read and save latest data
+    read_latest_data(
+        downloaded_data_path=downloaded_data_path, save_path=extracted_data_path
+    )
+
+    release_folder = os.listdir(extracted_data_path)
+
+    # there should be one directory created
+    assert len(release_folder) == 1
+    # and it starts with "v" (the date changes with each release)
+    assert release_folder[0].startswith("v")
+
+    output_files = os.listdir(extracted_data_path / release_folder[0])
+    # in the folder there should be three files
+    assert len(output_files) == 3
+
+    # a .yaml, .csv, and .nc file
+    required_extensions = {"nc", "csv", "yaml"}
+    file_extensions = {file.split(".")[-1] for file in output_files}
+    assert required_extensions == file_extensions

+ 2 - 0
tests/integration/test_read_script.py

@@ -3,6 +3,8 @@ import os
 from src.faostat_data_primap.helper.paths import root_path
 from src.faostat_data_primap.read import read_latest_data
 
+example_csv_content = ()
+
 
 def test_read_latest_data(tmp_path):
     # get the downloaded data from here