Browse Source

Included error handling routines from BTR downloader also into annexI downloader

Johannes Gütschow 3 months ago
parent
commit
5ba65977ba
1 changed files with 37 additions and 12 deletions
  1. 37 12
      src/unfccc_ghg_data/unfccc_downloader/download_annexI.py

+ 37 - 12
src/unfccc_ghg_data/unfccc_downloader/download_annexI.py

@@ -16,6 +16,7 @@ from random import randrange
 
 import pandas as pd
 import requests
+from requests import ConnectionError
 from selenium.webdriver import Firefox
 from selenium.webdriver.firefox.options import Options
 
@@ -133,11 +134,22 @@ if __name__ == "__main__":
         if not local_filename.parent.exists():
             local_filename.parent.mkdir()
 
-        if local_filename.exists():
-            # check file size. if 210 or 212 bytes it's the error page
-            if Path(local_filename).stat().st_size in error_file_sizes:
-                # found the error page. delete file
-                os.remove(local_filename)
+        try:
+            if local_filename.exists():
+                # check file size. if 210 or 212 bytes it's the error page
+                if Path(local_filename).stat().st_size in error_file_sizes:
+                    # found the error page. delete file
+                    os.remove(local_filename)
+        except OSError as ex:
+            if ex.errno == 36:  # noqa: PLR2004
+                print(
+                    f"Filename is too long: "
+                    f"{local_filename.relative_to(root_path)}. "
+                    f" Message: {ex}"
+                )
+                continue
+            else:
+                raise
 
         # now we have removed error pages, so a present file should not be overwritten
         if (not local_filename.exists()) and (not local_filename.is_symlink()):
@@ -157,14 +169,17 @@ if __name__ == "__main__":
                     for cookie in cookies_selenium:
                         cookies[cookie["name"]] = cookie["value"]
 
-                r = requests.get(url, stream=True, cookies=cookies, timeout=120)
-                with open(str(local_filename), "wb") as f:
-                    shutil.copyfileobj(r.raw, f)
+                try:
+                    r = requests.get(url, stream=True, cookies=cookies, timeout=120)
+                    with open(str(local_filename), "wb") as f:
+                        shutil.copyfileobj(r.raw, f)
 
-                # check file size. if 210 or 212 bytes it's the error page
-                if Path(local_filename).stat().st_size in error_file_sizes:
-                    # found the error page. delete file
-                    os.remove(local_filename)
+                    # check file size. if 210 or 212 bytes it's the error page
+                    if Path(local_filename).stat().st_size in error_file_sizes:
+                        # found the error page. delete file
+                        os.remove(local_filename)
+                except ConnectionError as ex:
+                    print(f"ConnectionError occurred: {ex}")
 
                 # sleep a bit to avoid running into captchas
                 time.sleep(randrange(5, 15))  # noqa: S311
@@ -190,6 +205,16 @@ if __name__ == "__main__":
                             "Zip format not supported, "
                             "please unzip on the command line."
                         )
+                    except OSError as ex:
+                        if ex.errno == 36:  # noqa: PLR2004
+                            print(
+                                f"A filename is too long in file: "
+                                f"{local_filename.relative_to(root_path)}. "
+                                "Unzip manually if any other files needed."
+                                f" Message: {ex}"
+                            )
+                        else:
+                            raise
                 else:
                     print(
                         f"Not attempting to extract "