import os

from src.faostat_data_primap.download import download_all_domains
from src.faostat_data_primap.read import read_latest_data


# test the complete download and read process
def test_download_all_domains(tmp_path):
    downloaded_data_path = tmp_path / "downloaded_data"
    download_all_domains(downloaded_data_path=downloaded_data_path)

    expected_downloaded_domains = [
        "farm_gate_emissions_crops",
        "farm_gate_livestock",
        "farm_gate_agriculture_energy",
        "land_use_forests",
        "land_use_fires",
        "land_use_drained_organic_soils",
        "pre_post_agricultural_production",
    ]

    domains = []
    for domain in downloaded_data_path.iterdir():
        if domain.is_dir():
            domains.append(domain.name)
        for release in domain.iterdir():
            downloaded_data = os.listdir(release)
            # make sure we have at least one .csv, one .pdf and one .zip file
            assert [f for f in downloaded_data if f.endswith(".csv")]
            assert [f for f in downloaded_data if f.endswith(".pdf")]
            assert [f for f in downloaded_data if f.endswith(".zip")]

    assert sorted(expected_downloaded_domains) == sorted(domains)

    extracted_data_path = tmp_path / "extracted_data"

    # read and save latest data
    read_latest_data(
        downloaded_data_path_custom=downloaded_data_path, save_path=extracted_data_path
    )

    release_folder = os.listdir(extracted_data_path)

    # there should be one directory created
    assert len(release_folder) == 1
    # and it starts with "v" (the date changes with each release)
    assert release_folder[0].startswith("v")

    output_files = os.listdir(extracted_data_path / release_folder[0])
    # in the folder there should be three files
    assert len(output_files) == 3

    # a .yaml, .csv, and .nc file
    required_extensions = {"nc", "csv", "yaml"}
    file_extensions = {file.split(".")[-1] for file in output_files}
    assert required_extensions == file_extensions