import argparse
import time
import pandas as pd

from pathlib import Path
from bs4 import BeautifulSoup
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from random import randrange
from unfccc_submission_info import get_unfccc_submission_info

root = Path(__file__).absolute().parents[2]

max_tries = 10

descr = ("Download UNFCCC National Inventory Submissions lists "
         "and create list of submissions as CSV file. Based on "
         "process.py from national-inventory-submissions "
         "(https://github.com/openclimatedata/national-inventory-submisions)")
parser = argparse.ArgumentParser(description=descr)
parser.add_argument(
    '--year',
    help='Year to download'
)

args = parser.parse_args()
year = args.year

print("Fetching submissions for {}".format(year))
# TODO: move to utils as used in two places
if int(year) == 2019:
    url = (
        "https://unfccc.int/process-and-meetings/transparency-and-reporting/"
        "reporting-and-review-under-the-convention/"
        "greenhouse-gas-inventories-annex-i-parties/"
        "national-inventory-submissions-{}".format(year)
    )
elif int(year) in range(2020,2023):
    url = (
        "https://unfccc.int/ghg-inventories-annex-i-parties/{}".format(year)
    )
elif int(year) >= 2023:
    url = (
        "https://unfccc.int/process-and-meetings/transparency-and-reporting/"
        "reporting-and-review-under-the-convention/"
        "greenhouse-gas-inventories-annex-i-parties/"
        "national-inventory-submissions-{}".format(year)
    )
else:
    url = (
        "https://unfccc.int/process/transparency-and-reporting/"
        "reporting-and-review-under-the-convention/"
        "greenhouse-gas-inventories-annex-i-parties/"
        "submissions/national-inventory-submissions-{}".format(year)
    )

print(f"Using {url} to get submissions list")

# set options for headless mode
profile_path = ".firefox"
options = Options()
options.add_argument('-headless')

# create profile for headless mode and automatic downloading
options.set_preference('profile', profile_path)

# set up selenium driver
driver = Firefox(options=options)
driver.get(url)

html = BeautifulSoup(driver.page_source, "html.parser")

table = html.find("table")

# check if table found. if not the get command didn't work, likely because of a captcha on the site
### TODO replace by error message
if not table:
    # try to load html file from disk
    print('Download failed, trying to load manually downloaded file')
    file = open("manual_page_downloads/National-Inventory-Submissions-{}.html".format(year))
    content = file.read()
    html = BeautifulSoup(content, "html.parser")
    table = html.find("table")
    if not table:
        print(
            "Manually downloaded file " + "manual_page_downloads/National-Inventory-Submissions-{}.html".format(year) +
            " not found")
        exit()

links = table.findAll('a')

targets = []  # sub-pages
downloads = []
no_downloads = []

# Check links for Zipfiles or subpages
for link in links:
    if "href" not in link.attrs:
        continue
    href = link.attrs["href"]
    if "/documents/" in href:
        if "title" in link.attrs.keys():
            title = link.attrs["title"]
        else:
            title = link.contents[0]
        if href.startswith("/documents"):
            href = "https://unfccc.int" + href
        # Only add pages in the format https://unfccc.int/documents/65587
        # to further downloads
        if str(Path(href).parent).endswith("documents"):
            targets.append({"title": title, "url": href})
    elif href.endswith(".zip"):
        if href.startswith("/files"):
            href = "https://unfccc.int" + href
        country = Path(href).name.split("-")[0].upper()
        title = f"{country} {link.contents[0]}"
        filename = Path(href).name
        file_parts = filename.split('-')
        if len(file_parts) >= 2:
            kind = file_parts[2].upper()
        elif filename.startswith('asr'):
            kind = 'CRF'
        else:
            kind = None

        print("\t".join([kind, country, title, href]))
        downloads.append({"Kind": kind, "Country": country, "Title": title, "URL": href})

# Go through sub-pages.
for target in targets:
    time.sleep(randrange(5, 15))
    url = target["url"]

    submission_info = get_unfccc_submission_info(url, driver, 10)

    if submission_info:
        downloads = downloads + submission_info
    else:
        no_downloads.append({target["title"], url})

if len(no_downloads) > 0:
    print("No downloads for ", no_downloads)

driver.close()
df = pd.DataFrame(downloads)
df.to_csv(root / "downloaded_data" / "UNFCCC" / f"submissions-annexI_{year}.csv", index=False)