crdanielbusch
/
FAOSTAT_data_primap


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
							"""Downloads all domain data sets from FAOSTAT website."""

from faostat_data_primap.download import (
    download_file,
    download_methodology,
    get_html_content,
    get_last_updated_date,
    unzip_file,
)
from faostat_data_primap.helper.definitions import domains, downloaded_data_path


def download_all_domains(
    domains: list[tuple[str]], downloaded_data_path: str = downloaded_data_path
) -> list[str]:
    """
    Download and unpack all climate-related domains from the FAO stat website.

    Extract the date when the data set was last updated and create a directory
    with the same name. Download the zip files for each domain if
    it does not already exist. Unpack the zip file and save in
    the same directory.

    Parameters
    ----------
    sources
        Name of data set, url to domain overview,
        and download url

    Returns
    -------
        List of input files that have been fetched or found locally.

    """
    downloaded_files = []
    for ds_name, urls in domains.items():
        url = urls["url_domain"]
        url_download = urls["url_download"]
        url_methodology = urls["url_methodology"]

        soup = get_html_content(url)

        last_updated = get_last_updated_date(soup, url)

        if not downloaded_data_path.exists():
            downloaded_data_path.mkdir()

        ds_path = downloaded_data_path / ds_name
        if not ds_path.exists():
            ds_path.mkdir()

        local_data_dir = ds_path / last_updated
        if not local_data_dir.exists():
            local_data_dir.mkdir()

        download_methodology(save_path=local_data_dir, url_download=url_methodology)

        local_filename = local_data_dir / f"{ds_name}.zip"

        download_file(url_download=url_download, save_path=local_filename)

        downloaded_files.append(str(local_filename))

        unzip_file(local_filename)

    return downloaded_files


if __name__ == "__main__":
    download_all_domains(domains)