Daniel Busch 4 months ago
parent
commit
70ccf44504

+ 3 - 1
.github/workflows/ci.yaml

@@ -20,7 +20,9 @@ jobs:
           venv-id: "docs"
           poetry-dependency-install-flags: "--all-extras --only 'main,dev'"
       - name: mypy
-        run: MYPYPATH=stubs poetry run mypy src
+        run: |
+          mypy --install-types
+          MYPYPATH=stubs poetry run mypy src
 
   docs:
     if: ${{ !github.event.pull_request.draft }}

+ 1 - 1
.pre-commit-config.yaml

@@ -14,7 +14,7 @@ repos:
       - id: check-case-conflict
       - id: check-json
       - id: check-merge-conflict
-      - id: check-symlinks
+#      - id: check-symlinks - DATALAD
       - id: check-yaml
       - id: debug-statements
       - id: detect-private-key

+ 7 - 8
src/faostat_data_primap/download.py

@@ -7,7 +7,6 @@ import time
 import zipfile
 from datetime import datetime
 
-import bs4
 import requests
 from bs4 import BeautifulSoup
 from selenium import webdriver
@@ -69,7 +68,7 @@ def find_previous_release_path(
     return domain_path / all_releases_datetime[index - 1].strftime("%Y-%m-%d")
 
 
-def calculate_checksum(file_path) -> str:
+def calculate_checksum(file_path: pathlib.PosixPath) -> str:
     """
     Calculate the SHA-256 checksum of a file.
 
@@ -90,7 +89,7 @@ def calculate_checksum(file_path) -> str:
     return sha256.hexdigest()
 
 
-def download_methodology(url_download: str, save_path: pathlib.PosixPath):
+def download_methodology(url_download: str, save_path: pathlib.PosixPath) -> None:
     """
     Download methodology file.
 
@@ -157,7 +156,7 @@ def download_methodology(url_download: str, save_path: pathlib.PosixPath):
             f.write(response.content)
 
 
-def get_html_content(url: str) -> bs4.BeautifulSoup:
+def get_html_content(url: str) -> BeautifulSoup:
     """
     Get html from url.
 
@@ -188,7 +187,7 @@ def get_html_content(url: str) -> bs4.BeautifulSoup:
     return BeautifulSoup(html_content, "html.parser")
 
 
-def get_last_updated_date(soup: bs4.BeautifulSoup, url: str) -> str:
+def get_last_updated_date(soup: BeautifulSoup, url: str) -> str:
     """
     Get the date when data set way last updated from html text
 
@@ -224,7 +223,7 @@ def get_last_updated_date(soup: bs4.BeautifulSoup, url: str) -> str:
     return last_updated
 
 
-def download_file(url_download: str, save_path: pathlib.PosixPath):
+def download_file(url_download: str, save_path: pathlib.PosixPath) -> bool:
     """
     Download file.
 
@@ -254,7 +253,7 @@ def download_file(url_download: str, save_path: pathlib.PosixPath):
     return False
 
 
-def unzip_file(local_filename: pathlib.PosixPath):
+def unzip_file(local_filename: pathlib.PosixPath) -> bool:
     """
     Unzip files in same directory. Skip if files are already there
 
@@ -295,7 +294,7 @@ def unzip_file(local_filename: pathlib.PosixPath):
 
 
 def download_all_domains(
-    domains: list[tuple[str]] = domains,
+    domains: dict[str, dict[str, str]] = domains,
     downloaded_data_path: str = downloaded_data_path,
 ) -> list[str]:
     """

+ 13 - 14
src/faostat_data_primap/exceptions.py

@@ -6,18 +6,17 @@ class DateTagNotFoundError(Exception):
     Raised when date for latest update cannot be found on FAO domain website
     """
 
+    def __init__(
+        self,
+        url: str,
+    ) -> None:
+        """
+        Initialise the error
 
-def __init__(
-    self,
-    url: "str",
-) -> None:
-    """
-    Initialise the error
-
-    Parameters
-    ----------
-    url
-        Link to download domain page
-    """
-    msg = f"Tag for date lat updated was not found on page with url {url}."
-    super().__init__(msg)
+        Parameters
+        ----------
+        url
+            Link to download domain page
+        """
+        msg = f"Tag for date lat updated was not found on page with url {url}."
+        super().__init__(msg)

+ 2 - 2
src/faostat_data_primap/helper/definitions.py

@@ -1,5 +1,5 @@
 """definitions like folders, mappings etc."""
-
+import pathlib
 from pathlib import Path
 
 domains = {
@@ -41,7 +41,7 @@ domains = {
 }
 
 
-def get_root_path(root_indicator: str = ".git"):
+def get_root_path(root_indicator: str = ".git") -> pathlib.PosixPath:
     """
     Traverse up from the current script location to find the repository root.