3 vuotta sitten · 9baada96c7
--- a/code/UNFCCC_downloader/download_bur.py
+++ b/code/UNFCCC_downloader/download_bur.py
@@ -4,7 +4,8 @@ import shutil
 
				 import time
			
 
				 import os
			
 
				 from datetime import date
			
 
				-from selenium import webdriver
			
 
				+from selenium.webdriver import Firefox
			
 
				+from selenium.webdriver.firefox.options import Options
			
 
				 from random import randrange
			
 
				 
			
 
				 from pathlib import Path
			
@@ -40,16 +41,16 @@ if not download_path.exists():
 
				     download_path.mkdir(parents=True)
			
 
				 
			
 
				 # set options for headless mode
			
 
				-options = webdriver.firefox.options.Options()
			
 
				-# options.add_argument('-headless')
			
 
				+profile_path = ".firefox"
			
 
				+options = Options()
			
 
				+#options.add_argument('-headless')
			
 
				 
			
 
				-# create profile for headless mode 
			
 
				-profile = webdriver.FirefoxProfile()
			
 
				-profile.set_preference('browser.download.folderList', 2)
			
 
				+# create profile for headless mode and automatic downloading
			
 
				+options.set_preference('profile', profile_path)
			
 
				+options.set_preference('browser.download.folderList', 2)
			
 
				 
			
 
				 # set up selenium driver
			
 
				-driver = webdriver.Firefox(options=options, firefox_profile=profile)
			
 
				-
			
 
				+driver = Firefox(options=options)
			
 
				 # visit the main data page once to create cookies
			
 
				 driver.get(url)
			
 
				 time.sleep(20)
			
--- a/code/UNFCCC_downloader/download_nc.py
+++ b/code/UNFCCC_downloader/download_nc.py
@@ -4,7 +4,8 @@ import shutil
 
				 import time
			
 
				 import os
			
 
				 from datetime import date
			
 
				-from selenium import webdriver
			
 
				+from selenium.webdriver import Firefox
			
 
				+from selenium.webdriver.firefox.options import Options
			
 
				 from random import randrange
			
 
				 
			
 
				 from pathlib import Path
			
@@ -20,6 +21,8 @@ based on download_bur from national-inventory-submissions
 
				 # download directly via selenium see link below
			
 
				 # https://sqa.stackexchange.com/questions/2197/
			
 
				 # how-to-download-a-file-using-seleniums-webdriver
			
 
				+# for automatic downloading see https://stackoverflow.com/questions/70740163/
			
 
				+# python-selenium-firefox-driver-dismiss-open-save-file-popup
			
 
				 ###############
			
 
				 
			
 
				 submissions = pd.read_csv(root / "downloaded_data" / "UNFCCC" /
			
@@ -37,18 +40,19 @@ if not download_path.exists():
 
				     download_path.mkdir(parents=True)
			
 
				 
			
 
				 # set options for headless mode
			
 
				-options = webdriver.firefox.options.Options()
			
 
				-# options.add_argument('-headless')
			
 
				+profile_path = ".firefox"
			
 
				+options = Options()
			
 
				+#options.add_argument('-headless')
			
 
				 
			
 
				-# create profile for headless mode 
			
 
				-profile = webdriver.FirefoxProfile()
			
 
				-profile.set_preference('browser.download.folderList', 2)
			
 
				+# create profile for headless mode and automatic downloading
			
 
				+options.set_preference('profile', profile_path)
			
 
				+options.set_preference('browser.download.folderList', 2)
			
 
				 
			
 
				 # set up selenium driver
			
 
				-driver = webdriver.Firefox(options=options, firefox_profile=profile)
			
 
				-
			
 
				+driver = Firefox(options=options)
			
 
				 # visit the main data page once to create cookies
			
 
				 driver.get(url)
			
 
				+
			
 
				 time.sleep(20)
			
 
				 
			
 
				 # get the session id cookie
			
--- a/code/UNFCCC_downloader/fetch_submissions_bur.py
+++ b/code/UNFCCC_downloader/fetch_submissions_bur.py
@@ -5,7 +5,8 @@ import re
 
				 
			
 
				 from pathlib import Path
			
 
				 from bs4 import BeautifulSoup
			
 
				-from selenium import webdriver
			
 
				+from selenium.webdriver import Firefox
			
 
				+from selenium.webdriver.firefox.options import Options
			
 
				 from random import randrange
			
 
				 
			
 
				 root = Path(__file__).parents[2]
			
@@ -17,11 +18,6 @@ Based on `process_bur` from national-inventory-submissions
 
				 (https://github.com/openclimatedata/national-inventory-submisions)
			
 
				 """
			
 
				 
			
 
				-# TODO for NC
			
 
				-## link is just /documents/XXXXX (but already dealt with in code below)
			
 
				-## url is https://unfccc.int/non-annex-I-NCs
			
 
				-## pattern needs NC instead of BUR
			
 
				-
			
 
				 print("Fetching BUR submissions ...")
			
 
				 
			
 
				 url = "https://unfccc.int/BURs"
			
@@ -29,17 +25,17 @@ url = "https://unfccc.int/BURs"
 
				 #print(url)
			
 
				 
			
 
				 # set options for headless mode
			
 
				-options = webdriver.firefox.options.Options()
			
 
				+profile_path = ".firefox"
			
 
				+options = Options()
			
 
				 options.add_argument('-headless')
			
 
				 
			
 
				 # create profile for headless mode and automatic downloading
			
 
				-profile = webdriver.FirefoxProfile()
			
 
				+options.set_preference('profile', profile_path)
			
 
				 
			
 
				 # set up selenium driver
			
 
				-driver = webdriver.Firefox(options=options, firefox_profile=profile)
			
 
				+driver = Firefox(options=options)
			
 
				 driver.get(url)
			
 
				 
			
 
				-
			
 
				 html = BeautifulSoup(driver.page_source, "html.parser")
			
 
				 table = html.find_all("table")[1]
			
 
				 links = table.findAll("a")
			
--- a/code/UNFCCC_downloader/fetch_submissions_nc.py
+++ b/code/UNFCCC_downloader/fetch_submissions_nc.py
@@ -5,7 +5,8 @@ import re
 
				 
			
 
				 from pathlib import Path
			
 
				 from bs4 import BeautifulSoup
			
 
				-from selenium import webdriver
			
 
				+from selenium.webdriver import Firefox
			
 
				+from selenium.webdriver.firefox.options import Options
			
 
				 from random import randrange
			
 
				 
			
 
				 root = Path(__file__).parents[2]
			
@@ -17,11 +18,6 @@ Based on `process_bur` from national-inventory-submissions
 
				 (https://github.com/openclimatedata/national-inventory-submisions)
			
 
				 """
			
 
				 
			
 
				-# TODO for NC
			
 
				-## link is just /documents/XXXXX (but already dealt with in code below)
			
 
				-## url is https://unfccc.int/non-annex-I-NCs
			
 
				-## pattern needs NC instead of BUR
			
 
				-
			
 
				 print("Fetching NC submissions ...")
			
 
				 
			
 
				 url = "https://unfccc.int/non-annex-I-NCs"
			
@@ -29,17 +25,17 @@ url = "https://unfccc.int/non-annex-I-NCs"
 
				 #print(url)
			
 
				 
			
 
				 # set options for headless mode
			
 
				-options = webdriver.firefox.options.Options()
			
 
				+profile_path = ".firefox"
			
 
				+options = Options()
			
 
				 options.add_argument('-headless')
			
 
				 
			
 
				 # create profile for headless mode and automatic downloading
			
 
				-profile = webdriver.FirefoxProfile()
			
 
				+options.set_preference('profile', profile_path)
			
 
				 
			
 
				 # set up selenium driver
			
 
				-driver = webdriver.Firefox(options=options, firefox_profile=profile)
			
 
				+driver = Firefox(options=options)
			
 
				 driver.get(url)
			
 
				 
			
 
				-
			
 
				 html = BeautifulSoup(driver.page_source, "html.parser")
			
 
				 table = html.find_all("table")[1]
			
 
				 links = table.findAll("a")