Browse Source

delete data script, unzip [skip ci]

Daniel Busch 5 months ago
parent
commit
e3edb3f13e
41 changed files with 100 additions and 9 deletions
  1. 0 1
      downloaded_data/emissions_crops/2024-11-07/test.zip
  2. 1 0
      downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_All_Data.csv
  3. 1 0
      downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv
  4. 1 0
      downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_AreaCodes.csv
  5. 1 0
      downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_Flags.csv
  6. 0 1
      downloaded_data/farm_gate_agriculture_energy/2024-11-07/test.zip
  7. 1 0
      downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_All_Data.csv
  8. 1 0
      downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_All_Data_NOFLAG.csv
  9. 1 0
      downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_AreaCodes.csv
  10. 1 0
      downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_Flags.csv
  11. 1 0
      downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_ItemCodes.csv
  12. 0 1
      downloaded_data/farm_gate_emissions_crops/2024-11-07/test.zip
  13. 1 0
      downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_All_Data.csv
  14. 1 0
      downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_All_Data_NOFLAG.csv
  15. 1 0
      downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_AreaCodes.csv
  16. 1 0
      downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_Flags.csv
  17. 1 0
      downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_ItemCodes.csv
  18. 0 1
      downloaded_data/farm_gate_livestock/2024-11-07/test.zip
  19. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_All_Data.csv
  20. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
  21. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_AreaCodes.csv
  22. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_Flags.csv
  23. 1 0
      downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_ItemCodes.csv
  24. 0 1
      downloaded_data/land_use_drained_organic_soils/2024-11-07/test.zip
  25. 1 0
      downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_All_Data.csv
  26. 1 0
      downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv
  27. 1 0
      downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_AreaCodes.csv
  28. 1 0
      downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_Flags.csv
  29. 0 1
      downloaded_data/land_use_fires/2024-11-07/test.zip
  30. 1 0
      downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_All_Data.csv
  31. 1 0
      downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv
  32. 1 0
      downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_AreaCodes.csv
  33. 1 0
      downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_Flags.csv
  34. 0 1
      downloaded_data/land_use_forests/2024-11-07/test.zip
  35. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_All_Data.csv
  36. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv
  37. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_AreaCodes.csv
  38. 1 0
      downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_Flags.csv
  39. 0 1
      downloaded_data/pre_post_agricultural_production/2024-11-07/test.zip
  40. 39 0
      scripts/remove_downloads.py
  41. 30 1
      src/faostat_data_primap/download.py

+ 0 - 1
downloaded_data/emissions_crops/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/mG/4x/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip

+ 1 - 0
downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/K2/xM/MD5E-s3747984--5c8e2441e4d635b94dbeca29d8a1cd0d.csv/MD5E-s3747984--5c8e2441e4d635b94dbeca29d8a1cd0d.csv

+ 1 - 0
downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/1V/8w/MD5E-s2905364--66bf9d1c3aba04a26b1acc2554e7c2d5.csv/MD5E-s2905364--66bf9d1c3aba04a26b1acc2554e7c2d5.csv

+ 1 - 0
downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 1 - 0
downloaded_data/farm_gate_agriculture_energy/2024-11-07/Emissions_Agriculture_Energy_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/4m/p0/MD5E-s1131872--3a3329f2115c62bab08ba71183623db7.zip/MD5E-s1131872--3a3329f2115c62bab08ba71183623db7.zip

+ 1 - 0
downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/9X/27/MD5E-s18421900--06c176efdfe8eba45d3c55fe10f6d483.csv/MD5E-s18421900--06c176efdfe8eba45d3c55fe10f6d483.csv

+ 1 - 0
downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/x2/Jq/MD5E-s13411273--09d470954a229689107df6f1c44d28b4.csv/MD5E-s13411273--09d470954a229689107df6f1c44d28b4.csv

+ 1 - 0
downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 1 - 0
downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/gW/QQ/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv

+ 1 - 0
downloaded_data/farm_gate_emissions_crops/2024-11-07/Emissions_crops_E_ItemCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/W7/5Z/MD5E-s272--615170d0f5f83341b7a3783c59dfe51b.csv/MD5E-s272--615170d0f5f83341b7a3783c59dfe51b.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/mG/4x/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip

+ 1 - 0
downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/Jk/Z7/MD5E-s130221694--993f534b79c1479b4487a34d48d05948.csv/MD5E-s130221694--993f534b79c1479b4487a34d48d05948.csv

+ 1 - 0
downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/P0/9j/MD5E-s96060625--9f782ebc87a0dc22ed20f488dc9aae0e.csv/MD5E-s96060625--9f782ebc87a0dc22ed20f488dc9aae0e.csv

+ 1 - 0
downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/4P/vG/MD5E-s6251--d86a24d9bb5f28127ff8970b7c033be9.csv/MD5E-s6251--d86a24d9bb5f28127ff8970b7c033be9.csv

+ 1 - 0
downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/gW/QQ/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv

+ 1 - 0
downloaded_data/farm_gate_livestock/2024-11-07/Emissions_livestock_E_ItemCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/GP/ZJ/MD5E-s611--12df116591f7c201ca38e86a76846d24.csv/MD5E-s611--12df116591f7c201ca38e86a76846d24.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/1M/0X/MD5E-s33910537--28bb5f9131517238e7a112e6871d5898.zip/MD5E-s33910537--28bb5f9131517238e7a112e6871d5898.zip

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/99/M8/MD5E-s2243842--559f478afc2a52862be39d60763caef5.csv/MD5E-s2243842--559f478afc2a52862be39d60763caef5.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/WZ/34/MD5E-s1666995--fcefba6724146fdbcdb43f0d0d4b79e3.csv/MD5E-s1666995--fcefba6724146fdbcdb43f0d0d4b79e3.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 1 - 0
downloaded_data/land_use_drained_organic_soils/2024-11-07/Emissions_Drained_Organic_Soils_E_ItemCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/Vg/XG/MD5E-s25--91eb7aa7ff09282acaf3300f619b8934.csv/MD5E-s25--91eb7aa7ff09282acaf3300f619b8934.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/FK/vp/MD5E-s387900--a022e0142fa658793302f93ce4820f51.zip/MD5E-s387900--a022e0142fa658793302f93ce4820f51.zip

+ 1 - 0
downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/X6/F8/MD5E-s8494656--8dd8d98e2b93871d065d3ab61cbbb2d0.csv/MD5E-s8494656--8dd8d98e2b93871d065d3ab61cbbb2d0.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/0j/QW/MD5E-s6357447--b13bfb628607550945857dff7ab22a90.csv/MD5E-s6357447--b13bfb628607550945857dff7ab22a90.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 1 - 0
downloaded_data/land_use_fires/2024-11-07/Emissions_Land_Use_Fires_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/land_use_fires/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/8j/pF/MD5E-s1749124--dc55869803658e9cd776b60ae24107eb.zip/MD5E-s1749124--dc55869803658e9cd776b60ae24107eb.zip

+ 1 - 0
downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/67/69/MD5E-s1110885--79f06db2c0b50d9d6262ad87e9981db7.csv/MD5E-s1110885--79f06db2c0b50d9d6262ad87e9981db7.csv

+ 1 - 0
downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/K5/XV/MD5E-s806494--d3d276b80c5dbe86be357eacb9ce1869.csv/MD5E-s806494--d3d276b80c5dbe86be357eacb9ce1869.csv

+ 1 - 0
downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/8F/gx/MD5E-s6442--bb95e21597ca3104fc033daa29e87e2c.csv/MD5E-s6442--bb95e21597ca3104fc033daa29e87e2c.csv

+ 1 - 0
downloaded_data/land_use_forests/2024-11-07/Emissions_Land_Use_Forests_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/land_use_forests/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/km/WQ/MD5E-s177332--a1265221e2763f2048a45f727864166e.zip/MD5E-s177332--a1265221e2763f2048a45f727864166e.zip

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_All_Data.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/F2/2V/MD5E-s9990445--1884fe626fe79701aef21dd62a7d999b.csv/MD5E-s9990445--1884fe626fe79701aef21dd62a7d999b.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/ZW/65/MD5E-s7957490--02212a350da3de62f437750ff2190da7.csv/MD5E-s7957490--02212a350da3de62f437750ff2190da7.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_AreaCodes.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/5F/x7/MD5E-s6898--e6afc975758367037e97b7a51601eef6.csv/MD5E-s6898--e6afc975758367037e97b7a51601eef6.csv

+ 1 - 0
downloaded_data/pre_post_agricultural_production/2024-11-07/Emissions_Pre_Post_Production_E_Flags.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/8f/5K/MD5E-s37--c802d00a3f6701103f12b45fd77c936d.csv/MD5E-s37--c802d00a3f6701103f12b45fd77c936d.csv

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2024-11-07/test.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/0z/JM/MD5E-s4159211--d529d764b672c1b778d13a7ca2cc3d13.zip/MD5E-s4159211--d529d764b672c1b778d13a7ca2cc3d13.zip

+ 39 - 0
scripts/remove_downloads.py

@@ -0,0 +1,39 @@
+"""Remove downloads for a particular date.
+
+Files are saved in a folder named after the current date,
+for example downloaded_data/farm_gate_agriculture_energy/2024-11-07
+This script deletes all files in such a folder. It is
+useful when testing downloads. Needs to be updated with the directory
+structure or maybe can be deleted altogether later.
+"""
+
+import os
+
+import click
+
+from faostat_data_primap.helper.definitions import downloaded_data_path
+
+
+@click.command()
+@click.option(
+    "--date",
+    help="The day on which the data to be deleted was downloaded",
+    default=None,
+)
+def run(date: str):
+    """
+    Delete all downloaded files for one day.
+    """
+    domains = os.listdir(downloaded_data_path)
+
+    for domain in domains:
+        path_to_files = downloaded_data_path / domain / date
+        files_to_delete = os.listdir(path_to_files)
+
+        for file in files_to_delete:
+            path_to_file = path_to_files / file
+            os.remove(path_to_file)
+
+
+if __name__ == "__main__":
+    run()

+ 30 - 1
src/faostat_data_primap/download.py

@@ -1,9 +1,11 @@
 """Downloads data from FAOSTAT website."""
 
+import os
+import zipfile
 from datetime import datetime
 
 import datalad.api
-from helper.definitions import downloaded_data_path
+from helper.definitions import downloaded_data_path, root_path
 
 if __name__ == "__main__":
     sources = [
@@ -62,6 +64,33 @@ if __name__ == "__main__":
             path=str(local_filename),
         )
 
+        # unzip
+        if local_filename.exists():
+            print(f"Download => {local_filename.relative_to(root_path)}")
+            # unzip data (only for new downloads)
+            if local_filename.suffix == ".zip":
+                try:
+                    zipped_file = zipfile.ZipFile(str(local_filename), "r")
+                    zipped_file.extractall(str(local_filename.parent))
+                    print(f"Extracted {len(zipped_file.namelist())} files.")
+                    zipped_file.close()
+                    os.remove(local_filename)
+                # TODO Better error logging/visibilty
+                except zipfile.BadZipFile:
+                    print(
+                        f"Error while trying to extract "
+                        f"{local_filename.relative_to(root_path)}"
+                    )
+                except NotImplementedError:
+                    print(
+                        "Zip format not supported, " "please unzip on the command line."
+                    )
+            else:
+                print(
+                    f"Not attempting to extract "
+                    f"{local_filename.relative_to(root_path)}."
+                )
+
         # Questions:
         # * Push to datalad .zip and unzipped, or only unzipped?
         # * What unique directory name to use -