Bladeren bron

Adapt NDC downloader to new UNFCCC metadata (Number not present any more)

Johannes Gütschow 1 jaar geleden
bovenliggende
commit
786a9806dd
1 gewijzigde bestanden met toevoegingen van 13 en 1 verwijderingen
  1. 13 1
      UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py

+ 13 - 1
UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py

@@ -3,6 +3,7 @@ import requests
 import shutil
 import time
 import os
+import re
 from datetime import date
 from random import randrange
 from UNFCCC_GHG_data.helper import downloaded_data_path_UNFCCC
@@ -33,6 +34,15 @@ url = "https://www4.unfccc.int/sites/NDCStaging/Pages/All.aspx"
 # TODO error page sizes are from BUR and NC and might differ for NDCs
 # if an error page is found instead of a pdf adjust sizes here
 error_file_sizes = [212, 210]
+ndc_regex = r".*\s([A-Za-z]*)\sNDC"
+ndc_to_number = {
+    "First": 1,
+    "Second": 2,
+    "Third": 3,
+    "Fourth": 4,
+    "Fifth": 5,
+}
+
 
 # Ensure download path and subfolders exist
 if not downloaded_data_path_UNFCCC.exists():
@@ -42,8 +52,10 @@ new_downloaded = []
 
 for idx, submission in submissions.iterrows():
     print("=" * 60)
-    ndc = submission.Number
+    #ndc = submission.Number
     title = submission.Title
+    temp = re.findall(ndc_regex, title)
+    ndc = ndc_to_number[temp[0]]
     url = submission.EncodedAbsUrl
     submission_date = submission.SubmissionDate
     country = submission.Party