Browse Source

clean up [skip ci]

Daniel Busch 5 months ago
parent
commit
4ccafe1268
2 changed files with 13 additions and 13 deletions
  1. 3 11
      scripts/remove_downloads.py
  2. 10 2
      src/faostat_data_primap/download.py

+ 3 - 11
scripts/remove_downloads.py

@@ -1,9 +1,7 @@
-"""Remove downloads for a particular date.
+"""Remove all downloads.
 
-Files are saved in a folder named after the current date,
-for example downloaded_data/farm_gate_agriculture_energy/2024-11-07
-This script deletes all files in such a folder. It is
-useful when testing downloads. Needs to be updated with the directory
+This script deletes all downloaded and unzipped files. It is
+useful for testing purposes. Needs to be updated with the directory
 structure or maybe can be deleted altogether later.
 """
 
@@ -13,12 +11,6 @@ import os
 from faostat_data_primap.helper.definitions import downloaded_data_path
 
 
-# @click.command()
-# @click.option(
-#     "--level",
-#     help="Delete all files on domain or release level",
-#     default="domain",
-# )
 def run():
     """
     Delete all downloaded files for all domains and all releases

+ 10 - 2
src/faostat_data_primap/download.py

@@ -26,8 +26,6 @@ def get_html_content(url: str) -> bs4.BeautifulSoup:
     Returns
     -------
         html content
-    -------
-
     """
     # If the chrome driver isn't found on your system PATH, Selenium
     # will automatically download it for you. Make sure there is no
@@ -49,6 +47,11 @@ def get_last_updated_date(soup: bs4.BeautifulSoup, url: str) -> str:
     """
     Get the date when data set way last updated from html text
 
+    The FAO stat domain overview page includes a date when
+    the data set was last updated. We need it to label our downloaded
+    data sets. This function searches and extracts the date
+    from the html code.
+
     Parameters
     ----------
     soup
@@ -60,6 +63,11 @@ def get_last_updated_date(soup: bs4.BeautifulSoup, url: str) -> str:
     Returns
     -------
         date when data set was last updated
+
+    Raises
+    ------
+    DateTagNotFoundError
+        If the tag for the date is not found in the html code
     """
     date_tag = soup.find("p", {"data-role": "date"})