Ver código fonte

Merge branch 'CRF_reading' of jguetschow/UNFCCC_non-AnnexI_data into main

Johannes Gütschow 10 meses atrás
pai
commit
12e2cbd418
89 arquivos alterados com 1178 adições e 68 exclusões
  1. 157 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_btr.py
  2. 2 2
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
  3. 97 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_btr.py
  4. 31 8
      UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
  5. 1 0
      UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR5_from_csv.py
  6. 0 5
      UNFCCC_GHG_data/UNFCCC_reader/Israel/config_ISR_BUR2.py
  7. 0 32
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
  8. 447 0
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2023.py
  9. 228 0
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py
  10. 2 1
      UNFCCC_GHG_data/helper/__init__.py
  11. 40 1
      UNFCCC_GHG_data/helper/functions.py
  12. 43 0
      dodo.py
  13. 1 0
      downloaded_data/UNFCCC/00_new_downloads_BTR1-2024-05-03.csv
  14. 1 0
      downloaded_data/UNFCCC/00_new_downloads_BUR-2024-04-26.csv
  15. 1 0
      downloaded_data/UNFCCC/00_new_downloads_NC-2024-04-26.csv
  16. 1 0
      downloaded_data/UNFCCC/Andorra/BTR1/1st_BTR_ANDORRA_%281%29.pdf
  17. 1 0
      downloaded_data/UNFCCC/Argentina/BUR5/argentina-bur5.pdf
  18. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1990_12042024.xlsx
  19. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1991_12042024.xlsx
  20. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1992_12042024.xlsx
  21. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1993_12042024.xlsx
  22. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1994_12042024.xlsx
  23. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1995_12042024.xlsx
  24. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1996_12042024.xlsx
  25. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1997_12042024.xlsx
  26. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1998_12042024.xlsx
  27. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1999_12042024.xlsx
  28. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2000_12042024.xlsx
  29. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2001_12042024.xlsx
  30. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2002_12042024.xlsx
  31. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2003_12042024.xlsx
  32. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2004_12042024.xlsx
  33. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2005_12042024.xlsx
  34. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2006_12042024.xlsx
  35. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2007_12042024.xlsx
  36. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2008_12042024.xlsx
  37. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2009_12042024.xlsx
  38. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2010_12042024.xlsx
  39. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2011_12042024.xlsx
  40. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2012_12042024.xlsx
  41. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2013_12042024.xlsx
  42. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2014_12042024.xlsx
  43. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2015_12042024.xlsx
  44. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2016_12042024.xlsx
  45. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2017_12042024.xlsx
  46. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2018_12042024.xlsx
  47. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2019_12042024.xlsx
  48. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2020_12042024.xlsx
  49. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2021_12042024.xlsx
  50. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2022_12042024.xlsx
  51. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/National_Inventory_Report_2022_-_Volume_1.pdf
  52. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/National_Inventory_Report_2022_-_Volume_2.pdf
  53. 1 0
      downloaded_data/UNFCCC/Australia/BTR1/aus-2024-crt-15apr24.zip
  54. 1 0
      downloaded_data/UNFCCC/Germany/BTR1/2024-04-15_DE_NID_2024_UNFCCC_english.pdf
  55. 1 0
      downloaded_data/UNFCCC/Guyana/BTR1/Guyana_First_Biennal_Transparency_Report_February_2024_-_Final.pdf
  56. 1 0
      downloaded_data/UNFCCC/Honduras/BUR2/ANEXO_TECNICOVF40324_comp_%281%29.pdf
  57. 1 0
      downloaded_data/UNFCCC/Honduras/BUR2/Document_BUR_Hn_2024.pdf
  58. 1 0
      downloaded_data/UNFCCC/Honduras/BUR2/Document_NIR_Hn_2024.pdf
  59. 1 0
      downloaded_data/UNFCCC/Japan/BTR1/NID-JPN-2024-v3.0.pdf
  60. 1 0
      downloaded_data/UNFCCC/Liechtenstein/BTR1/lie-2024-apr-nid.pdf
  61. 1 0
      downloaded_data/UNFCCC/Paraguay/NC4/IIN_INGEI1990-2019_PARAGUAY_vf%5B1%5D.pdf
  62. 1 0
      downloaded_data/UNFCCC/Saudi_Arabia/BUR2/Kingdom_of_Saudi_Arabia%C2%A0Bur2.pdf
  63. 1 0
      downloaded_data/UNFCCC/Serbia/NC3/3NC_Serbia.pdf
  64. 1 0
      downloaded_data/UNFCCC/Sierra_Leone/BUR1/Sierra_Leone_Updated_BUR.pdf
  65. 1 0
      downloaded_data/UNFCCC/Solomon_Islands/NC3/Solomon_Islands_TNC_Report.pdf
  66. 1 0
      downloaded_data/UNFCCC/Switzerland/BTR1/NID-CHE-2024.pdf
  67. 1 0
      downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Annexes.pdf
  68. 1 0
      downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-ERRATA.pdf
  69. 1 0
      downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-ERRATA_%282%29.zip
  70. 1 0
      downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Main-Text.pdf
  71. 1 0
      downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Main-Text_%282%29.zip
  72. 5 8
      downloaded_data/UNFCCC/folder_mapping.json
  73. 1 0
      downloaded_data/UNFCCC/submissions-BTR1.csv
  74. 1 0
      downloaded_data/UNFCCC/submissions-annexI_2024.csv
  75. 1 1
      downloaded_data/UNFCCC/submissions-bur.csv
  76. 1 1
      downloaded_data/UNFCCC/submissions-nc.csv
  77. 1 0
      downloaded_data/non-UNFCCC/Taiwan/2023_NIR/2023_NIR_executive_summary_english.pdf
  78. 1 0
      downloaded_data/non-UNFCCC/Taiwan/2023_NIR/2023_NIR_full_text.pdf
  79. 1 1
      extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.csv
  80. 1 1
      extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.nc
  81. 4 4
      extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.yaml
  82. 1 1
      extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.nc
  83. 2 2
      extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.yaml
  84. 1 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.csv
  85. 1 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.nc
  86. 23 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.yaml
  87. 1 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.csv
  88. 1 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.nc
  89. 24 0
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.yaml

+ 157 - 0
UNFCCC_GHG_data/UNFCCC_downloader/download_btr.py

@@ -0,0 +1,157 @@
+import argparse
+import pandas as pd
+import requests
+import shutil
+import time
+import os
+import zipfile
+from datetime import date
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
+from random import randrange
+from pathlib import Path
+
+from UNFCCC_GHG_data.helper import root_path, downloaded_data_path_UNFCCC
+from unfccc_submission_info import get_BTR_name_and_URL
+
+###############
+#
+# TODO
+# download directly via selenium see link below
+# https://sqa.stackexchange.com/questions/2197/
+# how-to-download-a-file-using-seleniums-webdriver
+# for automatic downloading see https://stackoverflow.com/questions/70740163/
+# python-selenium-firefox-driver-dismiss-open-save-file-popup
+###############
+
+descr = 'Download and unzip data from UNFCCC Biannial Transparency Reports Submissions. ' \
+        'Based on download.py from national-inventory-submissions ' \
+        '(https://github.com/openclimatedata/national-inventory-submisions)'
+parser = argparse.ArgumentParser(description=descr)
+
+parser.add_argument(
+    '--round',
+    help='Submission round to download, e.g. 1'
+)
+
+args = parser.parse_args()
+submission_round = int(args.round)
+
+round_name, url = get_BTR_name_and_URL(submission_round)
+dataset = f"BTR{submission_round}"
+
+print(f"Downloading data for {round_name} BTRs")
+
+error_file_sizes = [212, 210]
+
+# Read submissions list
+submissions = pd.read_csv(downloaded_data_path_UNFCCC / f"submissions-{dataset}.csv")
+
+# set options for headless mode
+profile_path = ".firefox"
+options = Options()
+#options.add_argument('-headless')
+
+# create profile for headless mode and automatic downloading
+options.set_preference('profile', profile_path)
+options.set_preference('browser.download.folderList', 2)
+
+# set up selenium driver
+driver = Firefox(options=options)
+# visit the main data page once to create cookies
+driver.get(url)
+
+# wait a bit for the website to load before we get the cookies
+time.sleep(20)
+
+# get the session id cookie
+cookies_selenium = driver.get_cookies()
+cookies = {}
+for cookie in cookies_selenium:
+    cookies[cookie['name']] = cookie['value']
+
+new_downloaded = []
+
+for idx, submission in submissions.iterrows():
+    print("=" * 60)
+    title = submission.Title
+    url = submission.URL
+    country = submission.Country
+    country = country.replace(' ', '_')
+    print(f"Downloading {title} from {url}")
+
+    country_folder = downloaded_data_path_UNFCCC / country
+    if not country_folder.exists():
+        country_folder.mkdir()
+    local_filename = \
+        country_folder / dataset / \
+        url.split('/')[-1].replace("%20", "_").replace(" ", "_")
+    if not local_filename.parent.exists():
+        local_filename.parent.mkdir()
+
+    if local_filename.exists():
+        # check file size. if 210 or 212 bytes it's the error page
+        if Path(local_filename).stat().st_size in error_file_sizes:
+            # found the error page. delete file
+            os.remove(local_filename)
+    
+    # now we have removed error pages, so a present file should not be overwritten
+    if (not local_filename.exists()) and (not local_filename.is_symlink()):
+        i = 0  # reset counter
+        while not local_filename.exists() and i < 10:
+            # for i = 0 and i = 5 try to get a new session ID
+            if i == 1 or i == 5:
+                driver = Firefox(options=options)
+    
+                # visit the main data page once to create cookies
+                driver.get(url)
+                time.sleep(20)
+
+                # get the session id cookie
+                cookies_selenium = driver.get_cookies()
+                cookies = {}
+                for cookie in cookies_selenium:
+                    cookies[cookie['name']] = cookie['value']
+
+            r = requests.get(url, stream=True, cookies=cookies)
+            with open(str(local_filename), 'wb') as f:
+                shutil.copyfileobj(r.raw, f)
+            
+            # check file size. if 210 or 212 bytes it's the error page
+            if Path(local_filename).stat().st_size in error_file_sizes:
+                # found the error page. delete file
+                os.remove(local_filename)
+            
+            # sleep a bit to avoid running into captchas
+            time.sleep(randrange(5, 15))
+            
+        if local_filename.exists():
+            new_downloaded.append(submission)
+            print(f"Download => {local_filename.relative_to(root_path)}")
+            # unzip data (only for new downloads)
+            if local_filename.suffix == ".zip":
+                try:
+                    zipped_file = zipfile.ZipFile(str(local_filename), 'r')
+                    zipped_file.extractall(str(local_filename.parent))
+                    print(f"Extracted {len(zipped_file.namelist())} files.")
+                    zipped_file.close()
+                # TODO Better error logging/visibilty
+                except zipfile.BadZipFile:
+                    print(f"Error while trying to extract "
+                          f"{local_filename.relative_to(root_path)}")
+                except NotImplementedError:
+                    print("Zip format not supported, please unzip on the command line.")
+            else:
+                print(f"Not attempting to extract "
+                      f"{local_filename.relative_to(root_path)}.")
+        else:
+            print(f"Failed to download {local_filename.relative_to(root_path)}")
+
+    else:
+        print(f"=> Already downloaded {local_filename.relative_to(root_path)}")
+
+driver.close()
+
+df = pd.DataFrame(new_downloaded)
+df.to_csv(downloaded_data_path_UNFCCC
+          / f"00_new_downloads_{dataset}-{date.today()}.csv", index=False)

+ 2 - 2
UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py

@@ -34,11 +34,11 @@ if int(year) == 2019:
         "greenhouse-gas-inventories-annex-i-parties/"
         "national-inventory-submissions-{}".format(year)
     )
-elif int(year) in range(2020,2023):
+elif int(year) in range(2020,2025):
     url = (
         "https://unfccc.int/ghg-inventories-annex-i-parties/{}".format(year)
     )
-elif int(year) >= 2023:
+elif int(year) >= 2025:
     url = (
         "https://unfccc.int/process-and-meetings/transparency-and-reporting/"
         "reporting-and-review-under-the-convention/"

+ 97 - 0
UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_btr.py

@@ -0,0 +1,97 @@
+import argparse
+import time
+import pandas as pd
+
+from pathlib import Path
+from bs4 import BeautifulSoup
+from selenium.webdriver import Firefox
+from selenium.webdriver.firefox.options import Options
+from random import randrange
+from unfccc_submission_info import (get_unfccc_submission_info,
+                                    get_BTR_name_and_URL)
+from UNFCCC_GHG_data.helper import downloaded_data_path_UNFCCC
+
+max_tries = 10
+
+descr = ("Download UNFCCC Biannial Transparency Reports Submissions lists "
+         "and create list of submissions as CSV file. Based on "
+         "process.py from national-inventory-submissions "
+         "(https://github.com/openclimatedata/national-inventory-submisions)")
+parser = argparse.ArgumentParser(description=descr)
+parser.add_argument(
+    '--round',
+    help='1 for first BTRs, 2 for second BTRs etc.'
+)
+
+args = parser.parse_args()
+submission_round = int(args.round)
+
+round_name, url = get_BTR_name_and_URL(submission_round)
+
+print(f"Fetching submissions for {round_name} BTRs")
+print(f"Using {url} to get submissions list")
+
+# set options for headless mode
+profile_path = ".firefox"
+options = Options()
+options.add_argument('-headless')
+
+# create profile for headless mode and automatic downloading
+options.set_preference('profile', profile_path)
+
+# set up selenium driver
+driver = Firefox(options=options)
+driver.get(url)
+
+html = BeautifulSoup(driver.page_source, "html.parser")
+
+table = html.find("table")
+
+# check if table found. if not the get command didn't work, likely because of a captcha on the site
+### TODO replace by error message
+if not table:
+    raise RuntimeError('No table found on URL. Possibly due to a captcha.')
+
+links = table.findAll('a')
+
+targets = []  # sub-pages
+downloads = []
+no_downloads = []
+
+# Check links for Zipfiles or subpages
+for link in links:
+    if "href" not in link.attrs:
+        continue
+    href = link.attrs["href"]
+    if "/documents/" in href:
+        if "title" in link.attrs.keys():
+            title = link.attrs["title"]
+        else:
+            title = link.contents[0]
+        if href.startswith("/documents"):
+            href = "https://unfccc.int" + href
+        # Only add pages in the format https://unfccc.int/documents/65587
+        # to further downloads
+        if str(Path(href).parent).endswith("documents"):
+            targets.append({"title": title, "url": href})
+    else:
+        print(f"Ignored link: {href}: not in the right format.")
+
+# Go through sub-pages.
+for target in targets:
+    time.sleep(randrange(5, 15))
+    url = target["url"]
+
+    submission_info = get_unfccc_submission_info(url, driver, 10)
+
+    if submission_info:
+        downloads = downloads + submission_info
+    else:
+        no_downloads.append({target["title"], url})
+
+if len(no_downloads) > 0:
+    print("No downloads for ", no_downloads)
+
+driver.close()
+df = pd.DataFrame(downloads)
+df.to_csv(downloaded_data_path_UNFCCC / f"submissions-BTR{submission_round}.csv", index=False)

+ 31 - 8
UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py

@@ -82,16 +82,14 @@ def get_unfccc_submission_info(
                         if match:
                             kind = match.group(0).replace(" ", "")
                         else:
-                            if ("CRF" in doctype) or ("CRF" in title):
-                                kind = "CRF"
-                            elif ("SEF" in doctype) or ("SEF" in title):
-                                kind = "SEF"
+                            if ("CRT" in doctype) or ("CRT" in title):
+                                kind = "CRT"
+                            elif ("NID" in doctype) or ("NID" in title):
+                                kind = "NID"
                             elif ("NIR" in doctype) or ("NIR" in title):
                                 kind = "NIR"
-                            elif "NC" in title:
-                                kind = "NC"
-                            elif "Status report" in title:
-                                kind = "CRF"
+                            elif ("BRT" in doctype) or ("BTR" in title):
+                                kind = "BTR"
                             else:
                                 kind = "other"
                 info.append({
@@ -106,3 +104,28 @@ def get_unfccc_submission_info(
             print(f"No files found for {url}")
 
     return info
+
+
+def get_BTR_name_and_URL(submission_round: int) -> (str, str):
+    """
+        Get the name and URL of a BTR for a given number
+
+    Parameters
+    ----------
+    submission_round (int)
+        submission_round of the BTRs e.g. 1
+
+    Returns
+    -------
+    name (str): name of the BTR submission round, e.g. 'first'
+    URL (str): URL of the submission page on the UNFCCC website
+
+    """
+
+    if submission_round == 1:
+        name = "first"
+        URL = "https://unfccc.int/first-biennial-transparency-reports"
+    else:
+        raise ValueError(f"Submission round {submission_round} is not defined")
+
+    return name, URL

+ 1 - 0
UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR5_from_csv.py

@@ -99,6 +99,7 @@ data_proc_pm2 = process_data_for_country(
 current_source = data_proc_pm2.coords["source"].values[0]
 data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
 data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
+data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
 
 # ###
 # save data to IF and native format

+ 0 - 5
UNFCCC_GHG_data/UNFCCC_reader/Israel/config_ISR_BUR2.py

@@ -388,11 +388,6 @@ cat_conversion = {
         'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'], 'name': 'National total '
                                                                     'excluding LULUCF'},
     },
-    'basket_copy': {
-        'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
-        'entities': ["HFCS", "PFCS"],
-        'source_GWP': 'SARGWP100',
-    },
 }
 
 sectors_to_save = [

+ 0 - 32
UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py

@@ -55,38 +55,6 @@ def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int
         data = data.reset_index(drop=True)
     return data
 
-def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: List[Union[int, str]])->pd.DataFrame:
-    index = data.loc[data[col] == keyword].index
-    if not list(index):
-        print("Keyword for table transformation not found")
-        return data
-    elif len(index)==1:
-        print("Keyword for table transformation found only once")
-        return data
-    else:
-        df_all = None
-        for i, item in enumerate(index):
-            loc = data.index.get_loc(item)
-            if i < len(index) - 1:
-                next_loc = data.index.get_loc(index[i + 1])
-            else:
-                next_loc = data.index[-1] + 1
-            df_to_add = data.loc[list(range(loc, next_loc))]
-            # select only cols which don't have NaN, Null, or '' as header
-            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
-            df_to_add = df_to_add.loc[: , filter_nan]
-            df_to_add.columns = df_to_add.iloc[0]
-            #print(df_to_add.columns)
-            df_to_add = df_to_add.drop(loc)
-            df_to_add = df_to_add.set_index(index_cols)
-            
-            if df_all is None:
-                df_all = df_to_add
-            else:
-                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
-        return df_all
-        
-
 # page defs tp hold information on reading the table
 page_defs = {
     '5': { 

+ 447 - 0
UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2023.py

@@ -0,0 +1,447 @@
+# config and functions for Taiwan NIR 2022
+
+from typing import Union, List
+import pandas as pd
+import xarray as xr
+from typing import Optional, Any
+
+gwp_to_use = "AR4GWP100"
+terminology_proc = 'IPCC2006_PRIMAP'
+
+##### Table definitions
+# page defs to hold information on reading the table
+page_defs = {
+    '5': { 
+        "table_areas": ['36,523,563,68'],
+        "split_text": False,
+        "flavor": "stream",
+    },
+    '6': {
+        "table_areas": ['34,562,563,53'],
+        #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '7': {
+        "table_areas": ['36,743,531,482', '36,425,564,54'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '8': {
+        "table_areas": ['35,748,534,567'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '9': {
+        "table_areas": ['34,753,565,286', '34,235,565,63'],
+        "split_text": False,
+        "flavor": "stream",
+    },
+    '10': {
+        "table_areas": ['34,753,565,449'],
+        "split_text": False,
+        "flavor": "stream",
+    },
+    '11': {
+        "table_areas": ['32,522,566,208'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '12': {
+        "table_areas": ['33,549,562,64'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '13': {
+        "table_areas": ['31,761,532,517'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '14': {
+        "table_areas": ['32,751,563,70'],
+        "columns": ['217,250,282,313,344,374,406,437,468,501,531'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '15': {
+        "table_areas": ['32,345,565,53'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '16': {
+        "table_areas": ['32,745,532,597'],
+        "split_text": True,
+        "flavor": "stream",
+    },
+    '18': {
+        "table_areas": ['30,747,564,260'],
+        "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
+        "split_text": True,
+        "flavor": "stream",
+    }, # correct mistakes later
+}
+
+# table defs to hold information on how to process the tables
+table_defs = {
+    'ES2.2': { # 1990-2021 Carbon Dioxide Emissions and Sequestration in Taiwan
+        "tables": [1, 2],
+        "rows_to_fix": {
+            0: { 
+                3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
+                    '2.D Non-Energy Products from Fuels and', 
+                    '4. Land Use, Land Use Change and Forestry'],
+            },
+        },
+        "index_cols": ['GHG Emission Source and Sinks'],
+        "wide_keyword": 'GHG Emission Source and Sinks',
+        "col_wide_kwd": 0, 
+        "entity": "CO2",
+        "unit": "kt",
+        "cat_codes_manual": {
+            'Net GHG Emission (including LULUCF)': '0',
+            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
+        },            
+    },
+    'ES2.3': { # 1990-2021 Methane Emissions in Taiwan
+        "tables": [3, 4],
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "entity": f"CH4 ({gwp_to_use})",
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total Methane Emissions': '0',
+        },
+        "drop_rows": [
+            "5.B Garbage Biological Treatment", # has lower significant digits than in table ES3.6
+            "2. Industrial Process and Product Use Sector",  # inconsistent with subsector sum (rounding)
+        ],
+    },
+    'ES2.4': { # 1990-2021 Nitrous Oxide Emissions in Taiwan
+        "tables": [5],
+        "fix_cats": {
+            0: {
+                "Total Nitrous Oxide Emissionsl": "Total Nitrous Oxide Emissions",
+            },
+        },            
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "entity": f"N2O ({gwp_to_use})",
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total Nitrous Oxide Emissions': '0',
+        },
+        "drop_rows": [
+            "3.F Field Burning of Agricultural Residues", # has lower significant digits than in table ES3.4
+            "5. Waste Sector", # error in 1996 data
+        ],
+    },
+    'ES2.5': { # 1990-2021 Fluoride-Containing Gas Emissions in Taiwan
+        "tables": [6,7],
+        "fix_cats": {},
+        "rows_to_fix": {
+            0: {
+                -2: ['Total PFCs Emissions (2.E Electronics Industry)',
+                    'Total SF6 Emissions',
+                    'Total NF3 Emissions (2.E Electronics Industry)'],
+            },
+        },
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0,
+        "gas_splitting": {
+            "Total HFCs Emissions": f"HFCS ({gwp_to_use})",
+            "Total PFCs Emissions (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
+            "Total SF6 Emissions": f"SF6 ({gwp_to_use})",
+            "Total NF3 Emissions (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
+            "Total Fluoride-Containing Gas Emissions": f"FGASES ({gwp_to_use})",
+            "GHG Emission Sources and Sinks": "entity",
+        },
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            "Total HFCs Emissions": "2",
+            "Total PFCs Emissions (2.E Electronics Industry)": "2.E",
+            "Total SF6 Emissions": "2",
+            "Total NF3 Emissions (2.E Electronics Industry)": "2.E",
+            "Total Fluoride-Containing Gas Emissions": "2",
+        },
+    },
+    'ES3.1': { # 1990-2021 Greenhouse Gas Emission in Taiwan by Sector
+        "tables": [8],
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Net GHG Emission (including LULUCF)': '0',
+            'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
+        },
+    },
+    'ES3.2': { # 1990-2021 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
+        "tables": [9,10],
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "gas_splitting": {
+            "Total CO2 Emission": "CO2",
+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
+            "Total N2O Emission": f"N2O ({gwp_to_use})",
+            "Total Emission from Energy Sector": f"KYOTOGHG ({gwp_to_use})",
+            "GHG Emission Sources and Sinks": "entity",
+        },
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total CO2 Emission': '1',
+            'Total CH4 Emission': '1',
+            'Total N2O Emission': '1',
+            'Total Emission from Energy Sector': '1',
+        },
+    },
+    'ES3.3': { # 1990-2021 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
+        "tables": [11],
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "gas_splitting": {
+            "Total CO2 Emission": "CO2",
+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
+            "Total N2O Emission": f"N2O ({gwp_to_use})",
+            "Total HFCs Emission": f"HFCS ({gwp_to_use})",
+            "Total PFCs Emission (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
+            "Total SF6 Emission": f"SF6 ({gwp_to_use})",
+            "Total NF3 Emission (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
+            "Total Emission from IPPU Sector": f"KYOTOGHG ({gwp_to_use})",
+            "GHG Emission Sources and Sinks": "entity",
+        },
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total CO2 Emission': '2',
+            'Total CH4 Emission': '2',
+            'Total N2O Emission': '2',
+            'Total HFCs Emission': '2',
+            'Total PFCs Emission (2.E Electronics Industry)': '2.E',
+            'Total SF6 Emission': '2',
+            'Total NF3 Emission (2.E Electronics Industry)': '2.E',
+            'Total Emission from IPPU Sector': '2',
+        },
+        "drop_rows": [
+        #     ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
+            "Total CH4 Emission",  # inconsistent with subsectors (rounding)
+        ]
+    }, 
+    'ES3.4': { # 1990-2021 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
+        "tables": [12,13],
+        "rows_to_fix": {},
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        "gas_splitting": {
+            "Total CO2 Emission (3.H Urea applied)": "CO2",
+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
+            "Total N2O Emission": f"N2O ({gwp_to_use})",
+            "Total Emission From Agriculture Sector": f"KYOTOGHG ({gwp_to_use})",
+            "GHG Emission Sources and Sinks": "entity",
+        },
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total CO2 Emission (3.H Urea applied)': '3.H',
+            'Total CH4 Emission': '3',
+            'Total N2O Emission': '3',
+            'Total Emission From Agriculture Sector': '3',
+        },
+    }, 
+    'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
+        "tables": [14],
+        "rows_to_fix": {
+            0: {
+                3: ["Total CO2 Emission"],
+            },
+        }, 
+        "index_cols": ['GHG Emission Sources and Sinks'], 
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, # two column header
+        "gas_splitting": {
+            "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
+            "Total CH4 Emission": f"CH4 ({gwp_to_use})",
+            "Total N2O Emission": f"N2O ({gwp_to_use})",
+            "Total Emission from Waste Sector": f"KYOTOGHG ({gwp_to_use})",
+            "GHG Emission Sources and Sinks": "entity",
+        },
+        "unit": "ktCO2eq",
+        "cat_codes_manual": {
+            'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
+            'Total CH4 Emission': '5',
+            'Total N2O Emission': '5',
+            'Total Emission from Waste Sector': '5',
+        },
+    }, 
+}
+
+table_defs_skip = {
+    'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
+        "tables": [0],
+        "rows_to_fix": {
+            0: { 
+                3: ['CO2'],
+            },
+            1: {  # wherte col 0 is empty
+                3: ['Net GHG Emission', 'Total GHG Emission'],
+            },
+        },
+        "index_cols": ['GHG', 'GWP'],
+        "wide_keyword": 'GHG',
+        "col_wide_kwd": 0, 
+        "unit": "ktCO2eq",
+    },
+    'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
+        "tables": [6],
+        "rows_to_fix": {
+            0: {
+                -2: ['Total SF6 Emissions', 
+                     'Total NF3 Emissions'],
+            },
+        },
+        "index_cols": ['GHG Emission Sources and Sinks'],
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, 
+        #"entity": "CO2",
+        "unit": "ktCO2eq",
+    },
+    'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
+        "tables": [12],
+        "rows_to_fix": {}, 
+        "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
+        "wide_keyword": 'GHG Emission Sources and Sinks',
+        "col_wide_kwd": 0, # two column header
+        "unit": "kt",
+        "entity": "CO2",
+    }, # need to consider the two columns specially (merge?)
+}
+
+
+##### primap2 metadata
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
+
+time_format = "%Y"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+    # "area": "Geo_code",
+}
+
+add_coords_cols = {
+    #    "orig_cat_name": ["orig_cat_name", "category"],
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_1996_Taiwan_Inv",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "TWN-GHG-Inventory",
+    "provenance": "measured",
+    "scenario": "2023NIR",
+    "area": "TWN",
+    # unit fill by table
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+}
+
+coords_value_filling = {}
+
+#
+filter_remove = {}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "2023 Republic of China - National Greenhouse Gas Report",
+    "comment": "Read fom pdf file and converted to PRIMAP2 format by Johannes Gütschow",
+    "institution": "Republic of China - Environmental Protection Administration",
+}
+
+##### processing information
+cat_conversion = {
+    'mapping': {
+        '0': '0',
+        'M.0.EL': 'M.0.EL',
+        '1': '1',
+        '1.A.1': '1.A.1',
+        '1.A.2': '1.A.2',
+        '1.A.3': '1.A.3',
+        '1.A.4': '1.A.4',
+        '1.A.4.a': '1.A.4.a',
+        '1.A.4.b': '1.A.4.b',
+        '1.A.4.c': '1.A.4.c',
+        '1.B.1': '1.B.1',
+        '1.B.2': '1.B.2',
+        '2': '2',
+        '2.A': '2.A',
+        '2.B': '2.B',
+        '2.C': '2.C',
+        '2.D': '2.D',
+        '2.E': '2.E',
+        '2.F': '2.F',
+        '2.G': '2.G',
+        '2.H': '2.H',
+        '3': 'M.AG',
+        '3.A': '3.A.1',
+        '3.B': '3.A.2',
+        '3.C': '3.C.7',
+        '3.D': 'M.3.AS',
+        '3.F': '3.C.1.b',
+        '3.H': '3.C.3',
+        '4': 'M.LULUCF',
+        '5': '4',
+        '5.A': '4.A',
+        '5.B': '4.B',
+        '5.C': '4.C',
+        '5.D': '4.D',
+        '5.D.1': '4.D.1',
+        '5.D.2': '4.D.2',
+    },
+    'aggregate': {
+        '1.A': {'sources': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
+                'name': 'Fuel Combustion Activities'},
+        '1.B': {'sources': ['1.B.1', '1.B.2'], 'name': 'Fugitive Emissions from Fuels'},
+        '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.E', '2.F', '2.G', '2.H'],
+              'name': 'Industrial Process and Product Use Sector'},
+        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
+        '3.B': {'sources': ['M.LULUCF'], 'name': 'Land'},
+        '3.C.1': {'sources': ['3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
+        '3.C.5': {'sources': ['3.C.5.a', '3.C.5.b'],
+                  'name': 'Indirect N2O Emissions from Managed Soils'},
+        '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.AS', '3.C.7'],
+                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
+        'M.AG.ELV': {'sources': ['3.C'],
+                     'name': 'Agriculture excluding livestock emissions'},
+        'M.AG': {'sources': ['3.A', '3.C'], 'name': 'Agriculture'},
+        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},  # consistency check
+        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4']}, # consistency check
+        '0': {'sources': ['1', '2', '3', '4']},  # consistency check
+    },
+}
+
+basket_copy = {
+    'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+    'entities': ["HFCS", "PFCS"],
+    'source_GWP': gwp_to_use,
+}
+

+ 228 - 0
UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py

@@ -0,0 +1,228 @@
+# this script reads data from Taiwan's 2023 national inventory
+# Data is read from the english summary pdf
+# TODO: add further GWPs and gas baskets
+
+import pandas as pd
+import primap2 as pm2
+import camelot
+import copy
+
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from UNFCCC_GHG_data.helper import compression, make_wide_table
+from UNFCCC_GHG_data.helper import process_data_for_country, gas_baskets
+from primap2.pm2io._data_reading import matches_time_format
+
+from config_TWN_NIR2022 import fix_rows
+from config_TWN_NIR2023 import table_defs, page_defs, cat_code_regexp
+from config_TWN_NIR2023 import terminology_proc
+from config_TWN_NIR2023 import gwp_to_use, basket_copy
+from config_TWN_NIR2023 import coords_cols, add_coords_cols, coords_defaults
+from config_TWN_NIR2023 import coords_terminologies, coords_value_mapping
+from config_TWN_NIR2023 import meta_data, cat_conversion
+
+
+# ###
+# configuration
+# ###
+input_folder = downloaded_data_path / 'non-UNFCCC' / 'Taiwan' / '2023_NIR'
+output_folder = extracted_data_path / 'non-UNFCCC' / 'Taiwan'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+output_filename = 'TWN_inventory_2023_'
+inventory_file = '2023_NIR_executive_summary_english.pdf'
+
+# ###
+# read the tables from pdf
+# ###
+
+all_tables = []
+for page in page_defs:
+    print(f"Reading from page {page}")
+    new_tables = camelot.read_pdf(
+        str(input_folder / inventory_file),
+        pages=page,
+        **page_defs[page],
+        )
+    for table in new_tables:
+        all_tables.append(table.df)
+
+
+# ###
+# convert tables to primap2 format
+# ###
+data_pm2 = None
+for table_name in table_defs.keys():
+    print(f"Working on table: {table_name}")
+
+    table_def = copy.deepcopy(table_defs[table_name])
+    # combine all raw tables
+    df_this_table = all_tables[table_def["tables"][0]].copy(deep=True)
+    if len(table_def["tables"]) > 1:
+        for table in table_def["tables"][1:]:
+            df_this_table = pd.concat(
+                [df_this_table, all_tables[table]],
+                axis=0,
+                join='outer')
+
+    # fix for table ES3.6
+    if table_name == 'ES3.6':
+        col_idx = df_this_table[0] == "Total CO Emission"
+        df_this_table.loc[col_idx, 1:] = ''
+        df_this_table.loc[col_idx, 0] = 'Total CO2 Emission'
+
+    df_this_table = df_this_table.reset_index(drop=True)
+
+    # fix categories if necessary
+    if "fix_cats" in table_def.keys():
+        for col in table_def["fix_cats"]:
+            df_this_table[col] = df_this_table[col].replace(table_def["fix_cats"][col])
+
+    # fix rows
+    for col in table_def["rows_to_fix"].keys():
+        for n_rows in table_def["rows_to_fix"][col].keys():
+            print(f"Fixing {col}, {n_rows}")
+            # replace line breaks, long hyphens, double, and triple spaces in category names
+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
+            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("–", "-")
+            df_this_table = fix_rows(df_this_table,
+                                     table_def["rows_to_fix"][col][n_rows], col, n_rows)
+
+    # split by entity
+    if "gas_splitting" in table_def.keys():
+        col_entity = [''] * len(df_this_table)
+        last_entity = ''
+        for i in range(0, len(df_this_table)):
+            current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
+            if current_header in table_def["gas_splitting"].keys():
+                last_entity = table_def["gas_splitting"][current_header]
+            col_entity[i] = last_entity
+
+        df_this_table["entity"] = col_entity
+        table_def["index_cols"].append("entity")
+
+    # make a wide table
+    df_this_table = make_wide_table(df_this_table, table_def["wide_keyword"],
+                                    table_def["col_wide_kwd"], table_def["index_cols"])
+
+    if "drop_rows" in table_def.keys():
+        df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
+
+    # reset row index
+    df_this_table = df_this_table.reset_index(drop=False)
+
+    # add entity
+    if "entity" in table_def.keys():
+        df_this_table["entity"] = table_def["entity"]
+
+    # add unit
+    df_this_table["unit"] = table_def["unit"]
+
+    df_this_table = df_this_table.rename({table_def["index_cols"][0]: "orig_cat_name"},
+                                         axis=1)
+
+    # print(table_def["index_cols"][0])
+    # print(df_this_table.columns.values)
+
+    # make a copy of the categories row
+    df_this_table["category"] = df_this_table["orig_cat_name"]
+
+    # replace cat names by codes in col "category"
+    # first the manual replacements
+    df_this_table["category"] = df_this_table["category"].replace(
+        table_def["cat_codes_manual"])
+    # then the regex replacements
+    repl = lambda m: m.group('UNFCCC_GHG_data')
+    df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
+                                                                      repl, regex=True)
+
+    ### convert to PRIMAP2 IF
+    # remove ','
+    time_format = '%Y'
+    time_columns = [
+        col
+        for col in df_this_table.columns.values
+        if matches_time_format(col, time_format)
+    ]
+
+    for col in time_columns:
+        df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(',', '',
+                                                                          regex=False)
+
+    # drop orig_cat_name as it's not unique per category
+    df_this_table = df_this_table.drop(columns="orig_cat_name")
+
+    # coords_defaults_this_table = coords_defaults.copy()
+    # coords_defaults_this_table["unit"] = table_def["unit"]
+    df_this_table_if = pm2.pm2io.convert_wide_dataframe_if(
+        df_this_table,
+        coords_cols=coords_cols,
+        add_coords_cols=add_coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        # coords_value_filling=coords_value_filling,
+        # filter_remove=filter_remove,
+        # filter_keep=filter_keep,
+        meta_data=meta_data
+    )
+
+    this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
+
+    if data_pm2 is None:
+        data_pm2 = this_table_pm2
+    else:
+        data_pm2 = data_pm2.pr.merge(this_table_pm2)
+
+# convert back to IF to have units in the fixed format
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# save data
+# ###
+# data in original categories
+pm2.pm2io.write_interchange_format(output_folder /
+                                   (output_filename + coords_terminologies["category"]),
+                                   data_if)
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf((output_folder /
+                       (output_filename + coords_terminologies[
+                           "category"])).with_suffix(".nc"),
+                      encoding=encoding)
+
+
+# ###
+# convert to IPCC2006 categories
+# ###
+data_proc_pm2 = data_pm2.copy(deep=True)
+
+
+country_processing = {
+    'basket_copy': basket_copy,
+}
+
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
+    entities_to_ignore=[],
+    gas_baskets=gas_baskets,
+    processing_info_country=country_processing,
+    cat_terminology_out = terminology_proc,
+    category_conversion = cat_conversion,
+)
+
+# convert to IF
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+# ###
+# save data
+# ###
+# data in 2006 categories
+pm2.pm2io.write_interchange_format(output_folder /
+                                   (output_filename + "IPCC2006_PRIMAP"),
+                                   data_proc_if)
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf((output_folder /
+                            (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
+                           encoding=encoding)

+ 2 - 1
UNFCCC_GHG_data/helper/__init__.py

@@ -8,7 +8,7 @@ from .definitions import GWP_factors, gas_baskets
 from .definitions import compression
 from .functions import get_country_code, get_country_name, convert_categories
 from .functions import create_folder_mapping, process_data_for_country, get_code_file
-from .functions import fix_rows
+from .functions import fix_rows, make_wide_table
 
 __all__ = [
     "root_path",
@@ -31,5 +31,6 @@ __all__ = [
     "create_folder_mapping",
     "process_data_for_country",
     "fix_rows",
+    "make_wide_table"
     "compression",
 ]

+ 40 - 1
UNFCCC_GHG_data/helper/functions.py

@@ -8,7 +8,7 @@ import pandas as pd
 import numpy as np
 from datetime import date
 from copy import deepcopy
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from pathlib import Path
 from .definitions import custom_country_mapping, custom_folders
 from .definitions import root_path, downloaded_data_path, extracted_data_path
@@ -221,6 +221,7 @@ def process_data_for_country(
                     )
 
         # aggregate categories
+        # TODO replace by primap2 function once it is in primap2 stable
         if "aggregate_cats" in processing_info_country:
             data_country = data_country.pr.dequantify()
             if "agg_tolerance" in processing_info_country:
@@ -377,6 +378,7 @@ def convert_categories(
 ) -> xr.Dataset:
     """
     convert data from one category terminology to another
+    # TODO rewrite to use aggregate_coordinates functions
     """
     print(f"converting categories to {terminology_to}")
 
@@ -980,3 +982,40 @@ def fix_rows(
         data.loc[indices_to_merge[0]] = new_row
         data = data.drop(indices_to_merge[1:])
     return data
+
+
+def make_wide_table(
+        data: pd.DataFrame,
+        keyword: str,
+        col: Union[int, str],
+        index_cols: List[Union[int, str]]
+) -> pd.DataFrame:
+    index = data.loc[data[col] == keyword].index
+    if not list(index):
+        print("Keyword for table transformation not found")
+        return data
+    elif len(index)==1:
+        print("Keyword for table transformation found only once")
+        return data
+    else:
+        df_all = None
+        for i, item in enumerate(index):
+            loc = data.index.get_loc(item)
+            if i < len(index) - 1:
+                next_loc = data.index.get_loc(index[i + 1])
+            else:
+                next_loc = data.index[-1] + 1
+            df_to_add = data.loc[list(range(loc, next_loc))]
+            # select only cols which don't have NaN, Null, or '' as header
+            filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
+            df_to_add = df_to_add.loc[: , filter_nan]
+            df_to_add.columns = df_to_add.iloc[0]
+            #print(df_to_add.columns)
+            df_to_add = df_to_add.drop(loc)
+            df_to_add = df_to_add.set_index(index_cols)
+
+            if df_all is None:
+                df_all = df_to_add
+            else:
+                df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
+        return df_all

+ 43 - 0
dodo.py

@@ -156,6 +156,49 @@ def task_download_annexi():
     }
 
 
+# annexI data: one update call for all data types (as they are on one page)
+# but for each year separately.
+# downloading is per year and
+update_btr_config = {
+    "round": get_var('round', None),
+}
+
+def task_update_btr():
+    """ Update list of BTR submissions """
+    return {
+        'targets': [f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"],
+        'actions': [f"datalad run -m 'Fetch Biannial Transparency Report submissions for BTR{update_btr_config['round']}' "
+                    "--explicit "
+                    f"-o downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_btr.py "
+                    f"--round={update_btr_config['round']}"],
+        'task_dep': ['set_env'],
+        'verbosity': 2,
+        'setup': ['setup_venv'],
+    }
+
+
+def task_download_btr():
+    """ Download BTR submissions """
+    return {
+        #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
+        # deactivate file_dep fow now as it will always run fetch submissions
+        # before download
+        'actions': [f"datalad run -m 'Download BTR submissions for "
+                    f"BTR{update_btr_config['round']}' "
+                    f"-i downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_btr.py "
+                    f"--round={update_btr_config['round']}",
+                    f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
+                    f"--folder=downloaded_data/UNFCCC"
+                    ],
+        'task_dep': ['set_env'],
+        'verbosity': 2,
+        'setup': ['setup_venv'],
+    }
+
+
+
 def task_download_ndc():
     """ Download NDC submissions """
     return {

+ 1 - 0
downloaded_data/UNFCCC/00_new_downloads_BTR1-2024-05-03.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/Z9/Qz/MD5E-s1734--60373097e7d45acd9783c02ebee355b4.csv/MD5E-s1734--60373097e7d45acd9783c02ebee355b4.csv

+ 1 - 0
downloaded_data/UNFCCC/00_new_downloads_BUR-2024-04-26.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/7Z/pF/MD5E-s945--56064b2703393ee1e7c1fca66ddda899.csv/MD5E-s945--56064b2703393ee1e7c1fca66ddda899.csv

+ 1 - 0
downloaded_data/UNFCCC/00_new_downloads_NC-2024-04-26.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/Pz/G9/MD5E-s472--88649e65316bd12e0c37072e2a5490fa.csv/MD5E-s472--88649e65316bd12e0c37072e2a5490fa.csv

+ 1 - 0
downloaded_data/UNFCCC/Andorra/BTR1/1st_BTR_ANDORRA_%281%29.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/vg/Z0/MD5E-s8782145--72752b5ec530537e567ee8e58091c4dd.pdf/MD5E-s8782145--72752b5ec530537e567ee8e58091c4dd.pdf

+ 1 - 0
downloaded_data/UNFCCC/Argentina/BUR5/argentina-bur5.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/2X/1Z/MD5E-s30786129--10656ac553d95029a288a3154769f261.pdf/MD5E-s30786129--10656ac553d95029a288a3154769f261.pdf

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1990_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/5x/qV/MD5E-s985305--607eb2169c4972ccdc12c32836f45074.xlsx/MD5E-s985305--607eb2169c4972ccdc12c32836f45074.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1991_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/JW/56/MD5E-s986139--61e3c1cfd887fa88b4c2b18f0fcb87fe.xlsx/MD5E-s986139--61e3c1cfd887fa88b4c2b18f0fcb87fe.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1992_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/WF/wG/MD5E-s985749--85cd486cf36f25bac4784359f9e9f1e2.xlsx/MD5E-s985749--85cd486cf36f25bac4784359f9e9f1e2.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1993_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/W0/4F/MD5E-s985357--5ab58f3397aa980a830cd14ccabb34d5.xlsx/MD5E-s985357--5ab58f3397aa980a830cd14ccabb34d5.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1994_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/QK/VM/MD5E-s986932--8c9746a1267f56268f7d337e3c6a744f.xlsx/MD5E-s986932--8c9746a1267f56268f7d337e3c6a744f.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1995_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/K3/1x/MD5E-s993743--971e5e415e41989f1211e0c5c9093772.xlsx/MD5E-s993743--971e5e415e41989f1211e0c5c9093772.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1996_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/0k/6k/MD5E-s994593--69f492a85666d475ab873f5e8ab009dc.xlsx/MD5E-s994593--69f492a85666d475ab873f5e8ab009dc.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1997_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/g1/MK/MD5E-s994084--4c8af55742b583925a7e35bd987787c8.xlsx/MD5E-s994084--4c8af55742b583925a7e35bd987787c8.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1998_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Jx/J2/MD5E-s993981--495a75487f6ae7709be65179b7a56441.xlsx/MD5E-s993981--495a75487f6ae7709be65179b7a56441.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_1999_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/82/4X/MD5E-s994735--fd0b55191c160ad2a3b9db7714004542.xlsx/MD5E-s994735--fd0b55191c160ad2a3b9db7714004542.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2000_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/zv/Zv/MD5E-s995688--3b3287d1766937665b46441ea49ac81c.xlsx/MD5E-s995688--3b3287d1766937665b46441ea49ac81c.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2001_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Z0/2z/MD5E-s996415--bb93e3156ddaee5346d70a4e4144ff2e.xlsx/MD5E-s996415--bb93e3156ddaee5346d70a4e4144ff2e.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2002_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Mz/Vm/MD5E-s996190--a05b17f3db7dd37eb96427c380186fdc.xlsx/MD5E-s996190--a05b17f3db7dd37eb96427c380186fdc.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2003_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/98/9M/MD5E-s998117--4ec6793bf2676e126e24a96690f8fcd8.xlsx/MD5E-s998117--4ec6793bf2676e126e24a96690f8fcd8.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2004_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/zX/4W/MD5E-s998929--3ed8646fdde04c915f7c900031c7d37e.xlsx/MD5E-s998929--3ed8646fdde04c915f7c900031c7d37e.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2005_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/13/g8/MD5E-s999124--ca3a44d852375bb8020652d286fa674a.xlsx/MD5E-s999124--ca3a44d852375bb8020652d286fa674a.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2006_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Q3/P6/MD5E-s999463--507f8a286d2c8080dc69fcec4890251a.xlsx/MD5E-s999463--507f8a286d2c8080dc69fcec4890251a.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2007_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Jm/Jz/MD5E-s1001518--a99dcb79db925b7b913a4e129e103be7.xlsx/MD5E-s1001518--a99dcb79db925b7b913a4e129e103be7.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2008_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/1m/j8/MD5E-s1001484--236138fa0e17b269861f69639c840d21.xlsx/MD5E-s1001484--236138fa0e17b269861f69639c840d21.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2009_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/p4/48/MD5E-s1001538--4b50cfab04c11ff2ef4bc91c61bd9eb1.xlsx/MD5E-s1001538--4b50cfab04c11ff2ef4bc91c61bd9eb1.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2010_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/60/WZ/MD5E-s1001164--54fed5c38e4edded43dd0fc515bd15b3.xlsx/MD5E-s1001164--54fed5c38e4edded43dd0fc515bd15b3.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2011_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Z1/3J/MD5E-s1001115--1f39eaebc8014eddf2807be7120ae565.xlsx/MD5E-s1001115--1f39eaebc8014eddf2807be7120ae565.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2012_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/K6/8z/MD5E-s1001511--e43f9d3b28fa4a2d64c92ba442e04f6e.xlsx/MD5E-s1001511--e43f9d3b28fa4a2d64c92ba442e04f6e.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2013_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/fJ/75/MD5E-s1001135--7937383b69adf99fde7e4d3cf384fb48.xlsx/MD5E-s1001135--7937383b69adf99fde7e4d3cf384fb48.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2014_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/p5/P7/MD5E-s1001457--643547a10d339fc2f24c8170de6dd810.xlsx/MD5E-s1001457--643547a10d339fc2f24c8170de6dd810.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2015_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Gx/gg/MD5E-s1001762--3c3d2d2dd54199f282b9e513bb3e3c65.xlsx/MD5E-s1001762--3c3d2d2dd54199f282b9e513bb3e3c65.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2016_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/kX/wm/MD5E-s1001601--63644628745f2d1584c29ec8a4b6abf3.xlsx/MD5E-s1001601--63644628745f2d1584c29ec8a4b6abf3.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2017_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/m0/gJ/MD5E-s1001359--3f4ab5f36007970b813c0456fa3d72d7.xlsx/MD5E-s1001359--3f4ab5f36007970b813c0456fa3d72d7.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2018_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/3V/pj/MD5E-s1001848--807c981f67086f0fb9a118e84ed62a42.xlsx/MD5E-s1001848--807c981f67086f0fb9a118e84ed62a42.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2019_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Z2/Kj/MD5E-s1001620--c20158d4e034c5862d3d6a9dfac5588b.xlsx/MD5E-s1001620--c20158d4e034c5862d3d6a9dfac5588b.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2020_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Mq/x2/MD5E-s1001963--e1ffe5c56333c22c06262d3a4c2a788b.xlsx/MD5E-s1001963--e1ffe5c56333c22c06262d3a4c2a788b.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2021_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/WG/Jw/MD5E-s1001819--78b14e672315af516bd1750c4fb766d5.xlsx/MD5E-s1001819--78b14e672315af516bd1750c4fb766d5.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/AUS_2024_2022_12042024.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/9Q/fK/MD5E-s984374--4a3da046202140422139f5e82c281f19.xlsx/MD5E-s984374--4a3da046202140422139f5e82c281f19.xlsx

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/National_Inventory_Report_2022_-_Volume_1.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/X1/jj/MD5E-s18514405--089cff2acd412d1ecf4bf194ea70e61a.pdf/MD5E-s18514405--089cff2acd412d1ecf4bf194ea70e61a.pdf

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/National_Inventory_Report_2022_-_Volume_2.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Jj/00/MD5E-s27844627--1a7d4bf800cf1ff5784b5514dcccff63.pdf/MD5E-s27844627--1a7d4bf800cf1ff5784b5514dcccff63.pdf

+ 1 - 0
downloaded_data/UNFCCC/Australia/BTR1/aus-2024-crt-15apr24.zip

@@ -0,0 +1 @@
+../../../../.git/annex/objects/V3/jv/MD5E-s30530455--916279be79c72becda66d2816bf67ce3.zip/MD5E-s30530455--916279be79c72becda66d2816bf67ce3.zip

+ 1 - 0
downloaded_data/UNFCCC/Germany/BTR1/2024-04-15_DE_NID_2024_UNFCCC_english.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/5q/Ww/MD5E-s30709021--0f914e713509db0a42662f1a8187d444.pdf/MD5E-s30709021--0f914e713509db0a42662f1a8187d444.pdf

+ 1 - 0
downloaded_data/UNFCCC/Guyana/BTR1/Guyana_First_Biennal_Transparency_Report_February_2024_-_Final.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/mz/jV/MD5E-s12027122--94d972e28b098771e545f196344b35b5.pdf/MD5E-s12027122--94d972e28b098771e545f196344b35b5.pdf

+ 1 - 0
downloaded_data/UNFCCC/Honduras/BUR2/ANEXO_TECNICOVF40324_comp_%281%29.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Jp/Ww/MD5E-s2969388--a6028b383165f91f726d860889563af6.pdf/MD5E-s2969388--a6028b383165f91f726d860889563af6.pdf

+ 1 - 0
downloaded_data/UNFCCC/Honduras/BUR2/Document_BUR_Hn_2024.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/wf/M2/MD5E-s8485333--a9b1d78546cba5be11825ceaf8b332c5.pdf/MD5E-s8485333--a9b1d78546cba5be11825ceaf8b332c5.pdf

+ 1 - 0
downloaded_data/UNFCCC/Honduras/BUR2/Document_NIR_Hn_2024.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/19/0Z/MD5E-s16456486--bf18779e3221a19d9faac10e62e61963.pdf/MD5E-s16456486--bf18779e3221a19d9faac10e62e61963.pdf

+ 1 - 0
downloaded_data/UNFCCC/Japan/BTR1/NID-JPN-2024-v3.0.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/jQ/6V/MD5E-s10848256--c6cbcf7fda2683b035d32e93ae1a2ece.0.pdf/MD5E-s10848256--c6cbcf7fda2683b035d32e93ae1a2ece.0.pdf

+ 1 - 0
downloaded_data/UNFCCC/Liechtenstein/BTR1/lie-2024-apr-nid.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Jz/g2/MD5E-s7617033--2fcff0015fa8ed20323996a524ecbd01.pdf/MD5E-s7617033--2fcff0015fa8ed20323996a524ecbd01.pdf

+ 1 - 0
downloaded_data/UNFCCC/Paraguay/NC4/IIN_INGEI1990-2019_PARAGUAY_vf%5B1%5D.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/xJ/9w/MD5E-s9519381--bf5890d4063490a469a49c09673b5605.pdf/MD5E-s9519381--bf5890d4063490a469a49c09673b5605.pdf

+ 1 - 0
downloaded_data/UNFCCC/Saudi_Arabia/BUR2/Kingdom_of_Saudi_Arabia%C2%A0Bur2.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/k8/03/MD5E-s2928912--761b72fc007a25debed7ee09723030df.pdf/MD5E-s2928912--761b72fc007a25debed7ee09723030df.pdf

+ 1 - 0
downloaded_data/UNFCCC/Serbia/NC3/3NC_Serbia.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/vz/GF/MD5E-s4428583--653b6f06430056883e72620f623b617f.pdf/MD5E-s4428583--653b6f06430056883e72620f623b617f.pdf

+ 1 - 0
downloaded_data/UNFCCC/Sierra_Leone/BUR1/Sierra_Leone_Updated_BUR.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Mx/vX/MD5E-s7186175--4d66c2be07aa26c9e6b75523c3d4e9ef.pdf/MD5E-s7186175--4d66c2be07aa26c9e6b75523c3d4e9ef.pdf

+ 1 - 0
downloaded_data/UNFCCC/Solomon_Islands/NC3/Solomon_Islands_TNC_Report.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/g6/Kf/MD5E-s120543040--79ea5840c1fbc078732390cd4b5e03e3.pdf/MD5E-s120543040--79ea5840c1fbc078732390cd4b5e03e3.pdf

+ 1 - 0
downloaded_data/UNFCCC/Switzerland/BTR1/NID-CHE-2024.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/7f/50/MD5E-s13148051--158e91859761a5c4fd7fae5472fecc57.pdf/MD5E-s13148051--158e91859761a5c4fd7fae5472fecc57.pdf

+ 1 - 0
downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Annexes.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/K5/ZJ/MD5E-s14203290--55ebfa5e7ecb517c058ab8b91f263e42.pdf/MD5E-s14203290--55ebfa5e7ecb517c058ab8b91f263e42.pdf

+ 1 - 0
downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-ERRATA.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/70/x9/MD5E-s354533--155469ffece489a0efa07dbbebd6f82d.pdf/MD5E-s354533--155469ffece489a0efa07dbbebd6f82d.pdf

+ 1 - 0
downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-ERRATA_%282%29.zip

@@ -0,0 +1 @@
+../../../../.git/annex/objects/m7/Px/MD5E-s349008--7f419d2250c482d1ad3eeda0ca0e78e5.zip/MD5E-s349008--7f419d2250c482d1ad3eeda0ca0e78e5.zip

+ 1 - 0
downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Main-Text.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/fk/FK/MD5E-s16660244--ecc354b04069a5afca6ccc54cd0f9005.pdf/MD5E-s16660244--ecc354b04069a5afca6ccc54cd0f9005.pdf

+ 1 - 0
downloaded_data/UNFCCC/United_States_of_America/BTR1/US-GHG-Inventory-2024-Main-Text_%282%29.zip

@@ -0,0 +1 @@
+../../../../.git/annex/objects/2Q/5M/MD5E-s26381473--f0b14a9e8a5ba8d6064877b787fe53ff.zip/MD5E-s26381473--f0b14a9e8a5ba8d6064877b787fe53ff.zip

+ 5 - 8
downloaded_data/UNFCCC/folder_mapping.json

@@ -64,8 +64,8 @@
     "FJI": "Fiji",
     "FRA": "France",
     "FSM": [
-        "Micronesia_(Federated_State_of)",
-        "Micronesia_(Federated_States_of)"
+        "Micronesia_(Federated_States_of)",
+        "Micronesia_(Federated_State_of)"
     ],
     "GAB": "Gabon",
     "GBR": "United_Kingdom_of_Great_Britain_and_Northern_Ireland",
@@ -120,8 +120,8 @@
     "MEX": "Mexico",
     "MHL": "Marshall_Islands",
     "MKD": [
-        "The_Republic_of_North_Macedonia",
-        "North_Macedonia"
+        "North_Macedonia",
+        "The_Republic_of_North_Macedonia"
     ],
     "MLI": "Mali",
     "MLT": "Malta",
@@ -187,10 +187,7 @@
     "TON": "Tonga",
     "TTO": "Trinidad_and_Tobago",
     "TUN": "Tunisia",
-    "TUR": [
-        "T\u00fcrkiye",
-        "Turkey"
-    ],
+    "TUR": "T\u00fcrkiye",
     "TUV": "Tuvalu",
     "TZA": "United_Republic_of_Tanzania",
     "UGA": "Uganda",

+ 1 - 0
downloaded_data/UNFCCC/submissions-BTR1.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/Z9/Qz/MD5E-s1734--60373097e7d45acd9783c02ebee355b4.csv/MD5E-s1734--60373097e7d45acd9783c02ebee355b4.csv

+ 1 - 0
downloaded_data/UNFCCC/submissions-annexI_2024.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/1M/mg/MD5E-s1631--d0865bf5b90631f31cf4d41c7381cf1a.csv/MD5E-s1631--d0865bf5b90631f31cf4d41c7381cf1a.csv

+ 1 - 1
downloaded_data/UNFCCC/submissions-bur.csv

@@ -1 +1 @@
-../../.git/annex/objects/x2/Z1/MD5E-s53357--ddb37f1f863bbd677d582d73c13e5b81.csv/MD5E-s53357--ddb37f1f863bbd677d582d73c13e5b81.csv
+../../.git/annex/objects/qj/XZ/MD5E-s54279--b2771d5ad902ce77fd39778f6eb2a6f0.csv/MD5E-s54279--b2771d5ad902ce77fd39778f6eb2a6f0.csv

+ 1 - 1
downloaded_data/UNFCCC/submissions-nc.csv

@@ -1 +1 @@
-../../.git/annex/objects/MP/0Q/MD5E-s83336--33e0e6dbedf59b157efd23650cf88ff5.csv/MD5E-s83336--33e0e6dbedf59b157efd23650cf88ff5.csv
+../../.git/annex/objects/qg/KM/MD5E-s84266--5b3532af589257a7f3b86387633e58cf.csv/MD5E-s84266--5b3532af589257a7f3b86387633e58cf.csv

+ 1 - 0
downloaded_data/non-UNFCCC/Taiwan/2023_NIR/2023_NIR_executive_summary_english.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/77/93/MD5E-s1721154--6ae366831b31be85bf2dd5b66d0a69ab.pdf/MD5E-s1721154--6ae366831b31be85bf2dd5b66d0a69ab.pdf

+ 1 - 0
downloaded_data/non-UNFCCC/Taiwan/2023_NIR/2023_NIR_full_text.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/Z2/p4/MD5E-s11949109--fc81798ece743acba960d6c6fd6f22ef.pdf/MD5E-s11949109--fc81798ece743acba960d6c6fd6f22ef.pdf

+ 1 - 1
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.csv

@@ -1 +1 @@
-../../../.git/annex/objects/pv/9g/MD5E-s1660050--3787beb422ef3af173dfb158da104660.csv/MD5E-s1660050--3787beb422ef3af173dfb158da104660.csv
+../../../.git/annex/objects/WP/9W/MD5E-s821964--d8dba8b10c47494323a6483fdf62f5ae.csv/MD5E-s821964--d8dba8b10c47494323a6483fdf62f5ae.csv

+ 1 - 1
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.nc

@@ -1 +1 @@
-../../../.git/annex/objects/35/qV/MD5E-s788143--edf517ddb0106df8a8edeb255c24550b.nc/MD5E-s788143--edf517ddb0106df8a8edeb255c24550b.nc
+../../../.git/annex/objects/xq/Qm/MD5E-s501061--c2270bb260a8bdc960e75423eab59540.nc/MD5E-s501061--c2270bb260a8bdc960e75423eab59540.nc

+ 4 - 4
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.yaml

@@ -3,8 +3,8 @@ attrs:
   ref2: https://ciam.ambiente.gob.ar/repositorio.php?tid=9&stid=36&did=394#
   rights: ''
   contact: mail@johannes-guetschow.de
-  title: ' Processed on 2024-04-05'
-  comment: Read fom pcsv file by Johannes Gütschow Processed on 2024-04-05
+  title: ' Processed on 2024-04-22'
+  comment: Read fom pcsv file by Johannes Gütschow Processed on 2024-04-22
   institution: United Nations Framework Convention on Climate Change (UNFCCC)
   cat: category (IPCC2006_PRIMAP)
   area: area (ISO3)
@@ -13,10 +13,10 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - scenario (PRIMAP)
-  - provenance
   - category (IPCC2006_PRIMAP)
   - source
+  - provenance
+  - scenario (PRIMAP)
   - area (ISO3)
   - entity
   - unit

+ 1 - 1
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.nc

@@ -1 +1 @@
-../../../.git/annex/objects/w4/3q/MD5E-s149640--ae27906848d822d8584154d1092c73ae.nc/MD5E-s149640--ae27906848d822d8584154d1092c73ae.nc
+../../../.git/annex/objects/44/PK/MD5E-s149640--4b969bbf6d50d71b5c5bf53666e7a197.nc/MD5E-s149640--4b969bbf6d50d71b5c5bf53666e7a197.nc

+ 2 - 2
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.yaml

@@ -13,10 +13,10 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - scenario (PRIMAP)
-  - provenance
   - category (IPCC2006_PRIMAP)
   - source
+  - provenance
+  - scenario (PRIMAP)
   - area (ISO3)
   - entity
   - unit

+ 1 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/Pm/pg/MD5E-s39163--4247f7e0de577e8b31fdf42c4af3c485.csv/MD5E-s39163--4247f7e0de577e8b31fdf42c4af3c485.csv

+ 1 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/g5/Z7/MD5E-s76217--3aa33a10d66204b2a5b2a3b72299f35f.nc/MD5E-s76217--3aa33a10d66204b2a5b2a3b72299f35f.nc

+ 23 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.yaml

@@ -0,0 +1,23 @@
+attrs:
+  references: 
+    https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 2023 Republic of China - National Greenhouse Gas Report
+  comment: Read fom pdf file and converted to PRIMAP2 format by Johannes Gütschow
+  institution: Republic of China - Environmental Protection Administration
+  cat: category (IPCC2006_1996_Taiwan_Inv)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_1996_Taiwan_Inv)
+  - scenario (PRIMAP)
+  - provenance
+  - area (ISO3)
+  - source
+  - entity
+  - unit
+data_file: TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.csv

+ 1 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/q2/1Q/MD5E-s170316--c7742074254da1854d8b40e475bbb0d2.csv/MD5E-s170316--c7742074254da1854d8b40e475bbb0d2.csv

+ 1 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/XX/x7/MD5E-s189701--28e9d468b8c43a574d60a0d60505a37d.nc/MD5E-s189701--28e9d468b8c43a574d60a0d60505a37d.nc

+ 24 - 0
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,24 @@
+attrs:
+  references: 
+    https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 2023 Republic of China - National Greenhouse Gas Report Processed on 2024-04-25
+  comment: Read fom pdf file and converted to PRIMAP2 format by Johannes Gütschow
+    Processed on 2024-04-25
+  institution: Republic of China - Environmental Protection Administration
+  cat: category (IPCC2006_PRIMAP)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - category (IPCC2006_PRIMAP)
+  - scenario (PRIMAP)
+  - provenance
+  - area (ISO3)
+  - source
+  - entity
+  - unit
+data_file: TWN_inventory_2023_IPCC2006_PRIMAP.csv