Selaa lähdekoodia

Adapt code to new selenium api

Johannes Gütschow 3 vuotta sitten
vanhempi
commit
9baada96c7

+ 9 - 8
code/UNFCCC_downloader/download_bur.py

@@ -4,7 +4,8 @@ import shutil
 import time
 import os
 from datetime import date
-from selenium import webdriver
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
 from random import randrange
 
 from pathlib import Path
@@ -40,16 +41,16 @@ if not download_path.exists():
     download_path.mkdir(parents=True)
 
 # set options for headless mode
-options = webdriver.firefox.options.Options()
-# options.add_argument('-headless')
+profile_path = ".firefox"
+options = Options()
+#options.add_argument('-headless')
 
-# create profile for headless mode 
-profile = webdriver.FirefoxProfile()
-profile.set_preference('browser.download.folderList', 2)
+# create profile for headless mode and automatic downloading
+options.set_preference('profile', profile_path)
+options.set_preference('browser.download.folderList', 2)
 
 # set up selenium driver
-driver = webdriver.Firefox(options=options, firefox_profile=profile)
-
+driver = Firefox(options=options)
 # visit the main data page once to create cookies
 driver.get(url)
 time.sleep(20)

+ 12 - 8
code/UNFCCC_downloader/download_nc.py

@@ -4,7 +4,8 @@ import shutil
 import time
 import os
 from datetime import date
-from selenium import webdriver
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
 from random import randrange
 
 from pathlib import Path
@@ -20,6 +21,8 @@ based on download_bur from national-inventory-submissions
 # download directly via selenium see link below
 # https://sqa.stackexchange.com/questions/2197/
 # how-to-download-a-file-using-seleniums-webdriver
+# for automatic downloading see https://stackoverflow.com/questions/70740163/
+# python-selenium-firefox-driver-dismiss-open-save-file-popup
 ###############
 
 submissions = pd.read_csv(root / "downloaded_data" / "UNFCCC" /
@@ -37,18 +40,19 @@ if not download_path.exists():
     download_path.mkdir(parents=True)
 
 # set options for headless mode
-options = webdriver.firefox.options.Options()
-# options.add_argument('-headless')
+profile_path = ".firefox"
+options = Options()
+#options.add_argument('-headless')
 
-# create profile for headless mode 
-profile = webdriver.FirefoxProfile()
-profile.set_preference('browser.download.folderList', 2)
+# create profile for headless mode and automatic downloading
+options.set_preference('profile', profile_path)
+options.set_preference('browser.download.folderList', 2)
 
 # set up selenium driver
-driver = webdriver.Firefox(options=options, firefox_profile=profile)
-
+driver = Firefox(options=options)
 # visit the main data page once to create cookies
 driver.get(url)
+
 time.sleep(20)
 
 # get the session id cookie

+ 6 - 10
code/UNFCCC_downloader/fetch_submissions_bur.py

@@ -5,7 +5,8 @@ import re
 
 from pathlib import Path
 from bs4 import BeautifulSoup
-from selenium import webdriver
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
 from random import randrange
 
 root = Path(__file__).parents[2]
@@ -17,11 +18,6 @@ Based on `process_bur` from national-inventory-submissions
 (https://github.com/openclimatedata/national-inventory-submisions)
 """
 
-# TODO for NC
-## link is just /documents/XXXXX (but already dealt with in code below)
-## url is https://unfccc.int/non-annex-I-NCs
-## pattern needs NC instead of BUR
-
 print("Fetching BUR submissions ...")
 
 url = "https://unfccc.int/BURs"
@@ -29,17 +25,17 @@ url = "https://unfccc.int/BURs"
 #print(url)
 
 # set options for headless mode
-options = webdriver.firefox.options.Options()
+profile_path = ".firefox"
+options = Options()
 options.add_argument('-headless')
 
 # create profile for headless mode and automatic downloading
-profile = webdriver.FirefoxProfile()
+options.set_preference('profile', profile_path)
 
 # set up selenium driver
-driver = webdriver.Firefox(options=options, firefox_profile=profile)
+driver = Firefox(options=options)
 driver.get(url)
 
-
 html = BeautifulSoup(driver.page_source, "html.parser")
 table = html.find_all("table")[1]
 links = table.findAll("a")

+ 6 - 10
code/UNFCCC_downloader/fetch_submissions_nc.py

@@ -5,7 +5,8 @@ import re
 
 from pathlib import Path
 from bs4 import BeautifulSoup
-from selenium import webdriver
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
 from random import randrange
 
 root = Path(__file__).parents[2]
@@ -17,11 +18,6 @@ Based on `process_bur` from national-inventory-submissions
 (https://github.com/openclimatedata/national-inventory-submisions)
 """
 
-# TODO for NC
-## link is just /documents/XXXXX (but already dealt with in code below)
-## url is https://unfccc.int/non-annex-I-NCs
-## pattern needs NC instead of BUR
-
 print("Fetching NC submissions ...")
 
 url = "https://unfccc.int/non-annex-I-NCs"
@@ -29,17 +25,17 @@ url = "https://unfccc.int/non-annex-I-NCs"
 #print(url)
 
 # set options for headless mode
-options = webdriver.firefox.options.Options()
+profile_path = ".firefox"
+options = Options()
 options.add_argument('-headless')
 
 # create profile for headless mode and automatic downloading
-profile = webdriver.FirefoxProfile()
+options.set_preference('profile', profile_path)
 
 # set up selenium driver
-driver = webdriver.Firefox(options=options, firefox_profile=profile)
+driver = Firefox(options=options)
 driver.get(url)
 
-
 html = BeautifulSoup(driver.page_source, "html.parser")
 table = html.find_all("table")[1]
 links = table.findAll("a")