Browse Source

Merge branch 'categorisation-and-conversion' of github.com:primap-community/FAOSTAT_data_primap into categorisation-and-conversion

Daniel Busch 2 months ago
parent
commit
4d8e21bf54
52 changed files with 38 additions and 389 deletions
  1. 1 0
      .gitattributes
  2. 1 1
      dodo.py
  3. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_All_Data.csv
  4. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv
  5. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_AreaCodes.csv
  6. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_Flags.csv
  7. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/GN_2023Oct_Final.pdf
  8. 0 1
      downloaded_data/farm_gate_agriculture_energy/2023-12-13/farm_gate_agriculture_energy.zip
  9. 1 1
      downloaded_data/farm_gate_agriculture_energy/2024-11-14/GN_2023Oct_Final.pdf
  10. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_All_Data.csv
  11. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_All_Data_NOFLAG.csv
  12. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_AreaCodes.csv
  13. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_Flags.csv
  14. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_ItemCodes.csv
  15. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/GCE_e.pdf
  16. 0 1
      downloaded_data/farm_gate_emissions_crops/2023-11-09/farm_gate_emissions_crops.zip
  17. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_All_Data.csv
  18. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_All_Data_NOFLAG.csv
  19. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_AreaCodes.csv
  20. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_Flags.csv
  21. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_ItemCodes.csv
  22. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/GLE_e.pdf
  23. 0 1
      downloaded_data/farm_gate_livestock/2023-11-09/farm_gate_livestock.zip
  24. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_All_Data.csv
  25. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
  26. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_AreaCodes.csv
  27. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_Flags.csv
  28. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_ItemCodes.csv
  29. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/GV_e.pdf
  30. 0 1
      downloaded_data/land_use_drained_organic_soils/2023-11-09/land_use_drained_organic_soils.zip
  31. 0 1
      downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_All_Data.csv
  32. 0 1
      downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv
  33. 0 1
      downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_AreaCodes.csv
  34. 0 1
      downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_Flags.csv
  35. 0 1
      downloaded_data/land_use_fires/2023-11-09/GI_e.pdf
  36. 0 1
      downloaded_data/land_use_fires/2023-11-09/land_use_fires.zip
  37. 0 1
      downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_All_Data.csv
  38. 0 1
      downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv
  39. 0 1
      downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_AreaCodes.csv
  40. 0 1
      downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_Flags.csv
  41. 0 1
      downloaded_data/land_use_forests/2023-11-09/GF_e.pdf
  42. 0 1
      downloaded_data/land_use_forests/2023-11-09/land_use_forests.zip
  43. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_All_Data.csv
  44. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv
  45. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_AreaCodes.csv
  46. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_Flags.csv
  47. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/README_Methodological_Note.pdf
  48. 0 1
      downloaded_data/pre_post_agricultural_production/2023-11-09/pre_post_agricultural_production.zip
  49. 1 1
      downloaded_data/pre_post_agricultural_production/2024-11-14/README_Methodological_Note.pdf
  50. 33 128
      src/faostat_data_primap/download.py
  51. 1 1
      src/faostat_data_primap/exceptions.py
  52. 0 212
      tests/unit/test_download.py

+ 1 - 0
.gitattributes

@@ -3,3 +3,4 @@
 * annex.largefiles=((mimeencoding=binary)and(largerthan=0))
 *.csv annex.largefiles=anything
 *.zip annex.largefiles=anything
+*.pdf annex.largefiles=anything

+ 1 - 1
dodo.py

@@ -6,7 +6,7 @@ import datalad.api
 
 def task_download():
     """
-    test datalad target
+    Download latest data
     """
 
     def datalad_run_download():

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/K2/xM/MD5E-s3747984--5c8e2441e4d635b94dbeca29d8a1cd0d.csv/MD5E-s3747984--5c8e2441e4d635b94dbeca29d8a1cd0d.csv

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/1V/8w/MD5E-s2905364--66bf9d1c3aba04a26b1acc2554e7c2d5.csv/MD5E-s2905364--66bf9d1c3aba04a26b1acc2554e7c2d5.csv

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/Emissions_Agriculture_Energy_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/GN_2023Oct_Final.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/JK/1V/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf

+ 0 - 1
downloaded_data/farm_gate_agriculture_energy/2023-12-13/farm_gate_agriculture_energy.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/4m/p0/MD5E-s1131872--3a3329f2115c62bab08ba71183623db7.zip/MD5E-s1131872--3a3329f2115c62bab08ba71183623db7.zip

+ 1 - 1
downloaded_data/farm_gate_agriculture_energy/2024-11-14/GN_2023Oct_Final.pdf

@@ -1 +1 @@
-/Users/danielbusch/Documents/FAOSTAT_data_primap/downloaded_data/farm_gate_agriculture_energy/2023-12-13/GN_2023Oct_Final.pdf
+../../../.git/annex/objects/JK/1V/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/9X/27/MD5E-s18421900--06c176efdfe8eba45d3c55fe10f6d483.csv/MD5E-s18421900--06c176efdfe8eba45d3c55fe10f6d483.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/x2/Jq/MD5E-s13411273--09d470954a229689107df6f1c44d28b4.csv/MD5E-s13411273--09d470954a229689107df6f1c44d28b4.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/gW/QQ/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/Emissions_crops_E_ItemCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/W7/5Z/MD5E-s272--615170d0f5f83341b7a3783c59dfe51b.csv/MD5E-s272--615170d0f5f83341b7a3783c59dfe51b.csv

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/GCE_e.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/9p/78/MD5E-s345124--d1210a945203a52677128d6e2c59fb20.pdf/MD5E-s345124--d1210a945203a52677128d6e2c59fb20.pdf

+ 0 - 1
downloaded_data/farm_gate_emissions_crops/2023-11-09/farm_gate_emissions_crops.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/mG/4x/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip/MD5E-s5025708--d6b35891c494f61bb1699f669611a959.zip

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/Jk/Z7/MD5E-s130221694--993f534b79c1479b4487a34d48d05948.csv/MD5E-s130221694--993f534b79c1479b4487a34d48d05948.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/P0/9j/MD5E-s96060625--9f782ebc87a0dc22ed20f488dc9aae0e.csv/MD5E-s96060625--9f782ebc87a0dc22ed20f488dc9aae0e.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/4P/vG/MD5E-s6251--d86a24d9bb5f28127ff8970b7c033be9.csv/MD5E-s6251--d86a24d9bb5f28127ff8970b7c033be9.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/gW/QQ/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv/MD5E-s134--7852035bf48ab964ef4b9a62152be7dc.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/Emissions_livestock_E_ItemCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/GP/ZJ/MD5E-s611--12df116591f7c201ca38e86a76846d24.csv/MD5E-s611--12df116591f7c201ca38e86a76846d24.csv

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/GLE_e.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/w6/pm/MD5E-s405033--f82222b4fccfdfe14b8bba56c2dd698b.pdf/MD5E-s405033--f82222b4fccfdfe14b8bba56c2dd698b.pdf

+ 0 - 1
downloaded_data/farm_gate_livestock/2023-11-09/farm_gate_livestock.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/1M/0X/MD5E-s33910537--28bb5f9131517238e7a112e6871d5898.zip/MD5E-s33910537--28bb5f9131517238e7a112e6871d5898.zip

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/99/M8/MD5E-s2243842--559f478afc2a52862be39d60763caef5.csv/MD5E-s2243842--559f478afc2a52862be39d60763caef5.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/WZ/34/MD5E-s1666995--fcefba6724146fdbcdb43f0d0d4b79e3.csv/MD5E-s1666995--fcefba6724146fdbcdb43f0d0d4b79e3.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/Emissions_Drained_Organic_Soils_E_ItemCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/Vg/XG/MD5E-s25--91eb7aa7ff09282acaf3300f619b8934.csv/MD5E-s25--91eb7aa7ff09282acaf3300f619b8934.csv

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/GV_e.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/m3/Gx/MD5E-s380874--a449b317052119d47931b6174e38de8c.pdf/MD5E-s380874--a449b317052119d47931b6174e38de8c.pdf

+ 0 - 1
downloaded_data/land_use_drained_organic_soils/2023-11-09/land_use_drained_organic_soils.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/FK/vp/MD5E-s387900--a022e0142fa658793302f93ce4820f51.zip/MD5E-s387900--a022e0142fa658793302f93ce4820f51.zip

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/X6/F8/MD5E-s8494656--8dd8d98e2b93871d065d3ab61cbbb2d0.csv/MD5E-s8494656--8dd8d98e2b93871d065d3ab61cbbb2d0.csv

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/0j/QW/MD5E-s6357447--b13bfb628607550945857dff7ab22a90.csv/MD5E-s6357447--b13bfb628607550945857dff7ab22a90.csv

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ww/m4/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv/MD5E-s6291--0c7df737cb3e007c75cbc38ab03cb1ae.csv

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/Emissions_Land_Use_Fires_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/GI_e.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/XK/gQ/MD5E-s308062--fdb50185d75c22aaa27a01c797e8c8ac.pdf/MD5E-s308062--fdb50185d75c22aaa27a01c797e8c8ac.pdf

+ 0 - 1
downloaded_data/land_use_fires/2023-11-09/land_use_fires.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/8j/pF/MD5E-s1749124--dc55869803658e9cd776b60ae24107eb.zip/MD5E-s1749124--dc55869803658e9cd776b60ae24107eb.zip

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/67/69/MD5E-s1110885--79f06db2c0b50d9d6262ad87e9981db7.csv/MD5E-s1110885--79f06db2c0b50d9d6262ad87e9981db7.csv

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/K5/XV/MD5E-s806494--d3d276b80c5dbe86be357eacb9ce1869.csv/MD5E-s806494--d3d276b80c5dbe86be357eacb9ce1869.csv

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/8F/gx/MD5E-s6442--bb95e21597ca3104fc033daa29e87e2c.csv/MD5E-s6442--bb95e21597ca3104fc033daa29e87e2c.csv

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/Emissions_Land_Use_Forests_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ZJ/33/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv/MD5E-s56--49ffa55879a2eb0ff6dba98c17944376.csv

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/GF_e.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/P3/GF/MD5E-s366469--5cbedb33ed921167503f29e1b529a9d1.pdf/MD5E-s366469--5cbedb33ed921167503f29e1b529a9d1.pdf

+ 0 - 1
downloaded_data/land_use_forests/2023-11-09/land_use_forests.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/km/WQ/MD5E-s177332--a1265221e2763f2048a45f727864166e.zip/MD5E-s177332--a1265221e2763f2048a45f727864166e.zip

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_All_Data.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/F2/2V/MD5E-s9990445--1884fe626fe79701aef21dd62a7d999b.csv/MD5E-s9990445--1884fe626fe79701aef21dd62a7d999b.csv

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_All_Data_NOFLAG.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/ZW/65/MD5E-s7957490--02212a350da3de62f437750ff2190da7.csv/MD5E-s7957490--02212a350da3de62f437750ff2190da7.csv

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_AreaCodes.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/5F/x7/MD5E-s6898--e6afc975758367037e97b7a51601eef6.csv/MD5E-s6898--e6afc975758367037e97b7a51601eef6.csv

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/Emissions_Pre_Post_Production_E_Flags.csv

@@ -1 +0,0 @@
-../../../.git/annex/objects/8f/5K/MD5E-s37--c802d00a3f6701103f12b45fd77c936d.csv/MD5E-s37--c802d00a3f6701103f12b45fd77c936d.csv

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/README_Methodological_Note.pdf

@@ -1 +0,0 @@
-../../../.git/annex/objects/28/j9/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf

+ 0 - 1
downloaded_data/pre_post_agricultural_production/2023-11-09/pre_post_agricultural_production.zip

@@ -1 +0,0 @@
-../../../.git/annex/objects/0z/JM/MD5E-s4159211--d529d764b672c1b778d13a7ca2cc3d13.zip/MD5E-s4159211--d529d764b672c1b778d13a7ca2cc3d13.zip

+ 1 - 1
downloaded_data/pre_post_agricultural_production/2024-11-14/README_Methodological_Note.pdf

@@ -1 +1 @@
-/Users/danielbusch/Documents/FAOSTAT_data_primap/downloaded_data/pre_post_agricultural_production/2023-11-09/README_Methodological_Note.pdf
+../../../.git/annex/objects/28/j9/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf

+ 33 - 128
src/faostat_data_primap/download.py

@@ -1,6 +1,5 @@
 """Downloads data from FAOSTAT website."""
 
-import hashlib
 import os
 import pathlib
 import time
@@ -18,78 +17,6 @@ from faostat_data_primap.helper.definitions import domains
 from faostat_data_primap.helper.paths import downloaded_data_path
 
 
-def find_previous_release_path(
-    current_release_path: pathlib.Path,
-) -> pathlib.Path | None:
-    """
-    Find the most recent previous release directory within same domain
-
-    Release directories are assumed to be subdirectories within the same parent
-    directory as `current_release_path`. The Sorting is done alphabetically,
-    so directory names should follow the naming convention YYYY-MM-DD
-
-    Parameters
-    ----------
-    current_release_path : pathlib.Path
-        The path of the current release directory.
-
-    Returns
-    -------
-    pathlib.Path or None
-        Returns the path of the most recent previous release directory if one exists,
-        otherwise returns None.
-    """
-    domain_path = current_release_path.parent
-    all_releases = [
-        release_name
-        for release_name in os.listdir(current_release_path.parent)
-        if (domain_path / release_name).is_dir()
-    ]
-
-    # make sure all directories follow the naming convention
-    try:
-        all_releases_datetime = [
-            datetime.strptime(release, "%Y-%m-%d") for release in all_releases
-        ]
-    except ValueError as e:
-        msg = (
-            "All release folders must be in YYYY-MM-DD format, "
-            f"got {sorted(all_releases)}"
-        )
-        raise ValueError(msg) from e
-
-    all_releases_datetime = sorted(all_releases_datetime)
-    current_release_datetime = datetime.strptime(current_release_path.name, "%Y-%m-%d")
-    index = all_releases_datetime.index(current_release_datetime)
-
-    # if the current release is the latest or the only one
-    if index == 0:
-        return None
-
-    return domain_path / all_releases_datetime[index - 1].strftime("%Y-%m-%d")
-
-
-def calculate_checksum(file_path: pathlib.Path) -> str:
-    """
-    Calculate the SHA-256 checksum of a file.
-
-    Parameters
-    ----------
-    file_path : pathlib.Path
-        The path to the file for which the checksum is calculated.
-
-    Returns
-    -------
-    str
-        The SHA-256 checksum of the file as a hexadecimal string.
-    """
-    sha256 = hashlib.sha256()
-    with open(file_path, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            sha256.update(chunk)
-    return sha256.hexdigest()
-
-
 def download_methodology(url_download: str, save_path: pathlib.Path) -> None:
     """
     Download methodology file.
@@ -115,46 +42,16 @@ def download_methodology(url_download: str, save_path: pathlib.Path) -> None:
     download_path = save_path / filename
 
     if download_path.exists():
-        print(f"Skipping download of {download_path} because it already exists.")
-        return
-
-    previous_release = find_previous_release_path(save_path)
-    # Attempt to find a file to compare in the previous release
-    if previous_release:
-        file_to_compare = previous_release / filename
-        if file_to_compare.exists():
-            response = requests.get(url_download, stream=True, timeout=30)
-            response.raise_for_status()
-            file_to_download_checksum = hashlib.sha256(response.content).hexdigest()
-            file_to_compare_checksum = calculate_checksum(file_to_compare)
-
-            if file_to_download_checksum == file_to_compare_checksum:
-                print(
-                    f"File '{filename}' is identical in the previous release. "
-                    f"Creating symlink."
-                )
-                os.symlink(file_to_compare, download_path)
-                return
-            else:
-                print(
-                    f"File '{filename}' differs from previous release. "
-                    f"Downloading file."
-                )
+        if download_path.is_symlink():
+            os.remove(download_path)
         else:
-            print(f"File '{filename}' not found in previous release. Downloading file.")
-            response = requests.get(url_download, stream=True, timeout=30)
-            response.raise_for_status()
-
-        # Save downloaded file to current release
-        with open(download_path, "wb") as f:
-            f.write(response.content)
+            print(f"Skipping download of {download_path} because it already exists.")
+            return
 
-    else:
-        print(f"No previous release found. Downloading file '{filename}'.")
-        response = requests.get(url_download, stream=True, timeout=30)
-        response.raise_for_status()
-        with open(download_path, "wb") as f:
-            f.write(response.content)
+    response = requests.get(url_download, stream=True, timeout=30)
+    response.raise_for_status()
+    with open(download_path, "wb") as f:
+        f.write(response.content)
 
 
 def get_html_content(url: str) -> BeautifulSoup:
@@ -241,17 +138,18 @@ def download_file(url_download: str, save_path: pathlib.Path) -> bool:
     -------
         True if the file was downloaded, False if a cached file was found
     """
-    if not save_path.exists():
-        with requests.get(url_download, stream=True, timeout=30) as response:
-            response.raise_for_status()
+    if save_path.exists():
+        if not save_path.is_symlink():
+            print(f"Skipping download of {save_path} because it already exists.")
+            return False
+        os.remove(save_path)
 
-            with open(save_path, mode="wb") as file:
-                file.write(response.content)
+    with requests.get(url_download, stream=True, timeout=30) as response:
+        response.raise_for_status()
+        with open(save_path, mode="wb") as file:
+            file.write(response.content)
 
-        return True
-    else:
-        print(f"Skipping download of {save_path}" " because it already exists.")
-    return False
+    return True
 
 
 def unzip_file(local_filename: pathlib.Path) -> list[str]:
@@ -275,14 +173,21 @@ def unzip_file(local_filename: pathlib.Path) -> list[str]:
                     extracted_file_path = local_filename.parent / file_info.filename
 
                     if extracted_file_path.exists():
-                        print(
-                            f"File '{file_info.filename}' already exists. "
-                            f"Skipping extraction."
-                        )
-                    else:
-                        print(f"Extracting '{file_info.filename}'...")
-                        zip_file.extract(file_info, local_filename.parent)
-                        unzipped_files.append(local_filename.name)
+                        if not extracted_file_path.is_symlink():
+                            print(
+                                f"File '{file_info.filename}' already exists. "
+                                f"Skipping extraction."
+                            )
+                            continue
+                        else:
+                            file_to_unzip_path = (
+                                local_filename.parent / file_info.filename
+                            )
+                            os.remove(file_to_unzip_path)
+
+                    print(f"Extracting '{file_info.filename}'...")
+                    zip_file.extract(file_info, local_filename.parent)
+                    unzipped_files.append(local_filename.name)
 
         # TODO Better error logging/visibilty
         except zipfile.BadZipFile:

+ 1 - 1
src/faostat_data_primap/exceptions.py

@@ -18,5 +18,5 @@ class DateTagNotFoundError(Exception):
         url
             Link to download domain page
         """
-        msg = f"Tag for date lat updated was not found on page with url {url}."
+        msg = f"Tag for date last updated was not found on page with url {url}."
         super().__init__(msg)

+ 0 - 212
tests/unit/test_download.py

@@ -47,215 +47,3 @@ def temp_domain_directories(tmp_path):
         "downloaded_data": downloaded_data,
         "domain_paths": domain_paths,
     }
-
-
-@pytest.mark.parametrize(
-    "releases," "current_release_date, " "expected_result_date",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2024-11-29",
-            "2024-11-09",
-            id="current release is latest release",
-        ),
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2023-12-13",
-            "2022-03-18",
-            id="current somewhere not the latest release",
-        ),
-    ],
-)
-def test_find_previous_release_path_exists(
-    temp_domain_directories, releases, current_release_date, expected_result_date
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-    expected_result = domain_path / expected_result_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    result = find_previous_release_path(current_release_path)
-
-    assert result == expected_result
-
-
-@pytest.mark.parametrize(
-    "releases,current_release_date",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2022-03-18",
-            id="current release is oldest release",
-        ),
-        pytest.param(
-            ["2024-11-09"], "2024-11-09", id="current release is only release"
-        ),
-    ],
-)
-def test_find_previous_release_path_that_does_not_exists(
-    temp_domain_directories, releases, current_release_date
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    result = find_previous_release_path(current_release_path)
-
-    assert not result
-
-
-@pytest.mark.parametrize(
-    "releases,current_release_date, error_msg",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "20240-11-09"],
-            "2022-03-18",
-            (
-                "All release folders must be in YYYY-MM-DD format, got "
-                "['2022-03-18', '2023-12-13', '2024-11-29', '20240-11-09']"
-            ),
-            id="typo",
-        ),
-        pytest.param(
-            ["20231213", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2022-03-18",
-            (
-                "All release folders must be in YYYY-MM-DD format, got "
-                "['2022-03-18', '20231213', '2024-11-09', '2024-11-29']"
-            ),
-            id="missing hyphen",
-        ),
-    ],
-)
-def test_find_previous_release_path_wrong_dir_format(
-    temp_domain_directories, releases, current_release_date, error_msg
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    with pytest.raises(ValueError) as excinfo:
-        result = find_previous_release_path(current_release_path)  # noqa: F841
-
-    assert str(excinfo.value) == error_msg
-
-
-def test_calculate_checksum(tmp_path):
-    filepath_a = tmp_path / "test_file_a.txt"
-    with open(filepath_a, "w") as f:
-        f.write("content of file a")
-
-    filepath_b = tmp_path / "test_file_b.txt"
-    with open(filepath_b, "w") as f:
-        f.write("content of file a")
-
-    filepath_c = tmp_path / "test_file_c.txt"
-    with open(filepath_c, "w") as f:
-        f.write("content of file c")
-
-    checksum_a = calculate_checksum(filepath_a)
-
-    checksum_b = calculate_checksum(filepath_b)
-
-    checksum_c = calculate_checksum(filepath_c)
-
-    assert checksum_a == checksum_b
-
-    assert checksum_b != checksum_c
-
-
-def test_file_exists_in_previous_release_and_is_the_same(temp_domain_directories):
-    # set up temporary directories
-    downloaded_data_path = temp_domain_directories["downloaded_data"]
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    response = requests.get(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf",
-        stream=True,
-        timeout=30,
-    )
-    response.raise_for_status()  # Check for successful request
-    with open(file_to_compare_path, "wb") as f:
-        f.write(response.content)
-    save_path = downloaded_data_path / "farm_gate_emissions_crops" / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.is_symlink()
-
-
-def test_methodology_document_exists_in_previous_release_but_is_different(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    with open(file_to_compare_path, "wb") as f:
-        s = "hi"
-        f.write(s.encode("utf-8"))
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()
-
-
-def test_methodology_document_does_not_exist_in_previous_release(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()