2 лет назад · e80dcb0482
--- a/code/UNFCCC_downloader/unfccc_submission_info.py
+++ b/code/UNFCCC_downloader/unfccc_submission_info.py
@@ -17,13 +17,16 @@ def get_unfccc_submission_info(
 
				     info = []
			
 
				     pattern = re.compile(r"BUR ?\d")
			
 
				     i = 0
			
 
				+    last_excep = None
			
 
				     while i < max_tries:
			
 
				         try:
			
 
				             driver.get(url)
			
 
				             html = BeautifulSoup(driver.page_source, "html.parser")
			
 
				-            title = html.find("h1").contents[0]
			
 
				+            subtree = html.find(class_="document-title")
			
 
				+            title = subtree.find("span").contents[0]
			
 
				             break
			
 
				-        except (AttributeError, WebDriverException):
			
 
				+        except (AttributeError, WebDriverException) as excep:
			
 
				+            last_excep = excep
			
 
				             print(f"Error fetching {url}")
			
 
				             print("Retrying ...")
			
 
				             time.sleep(randrange(5, 15))
			
@@ -31,7 +34,8 @@ def get_unfccc_submission_info(
 
				             continue
			
 
				 
			
 
				     if i == max_tries:
			
 
				-        print(f"Aborting after {max_tries} tries")
			
 
				+        print(f"Aborting after {max_tries} tries.")
			
 
				+        print(last_excep)
			
 
				     else:
			
 
				         match = pattern.search(title)
			
 
				         if match:
			
@@ -39,31 +43,35 @@ def get_unfccc_submission_info(
 
				         else:
			
 
				             kind = None
			
 
				 
			
 
				-        h2 = html.find("h2", text="Versions")
			
 
				-        if h2:
			
 
				-            div = h2.findNext("div")
			
 
				-            links = div.findAll("a")
			
 
				-            try:
			
 
				-                country = (
			
 
				-                    html.find("h2", text="Countries").findNext("div").findNext("div").text
			
 
				-                )
			
 
				-            except AttributeError:
			
 
				-                country = (
			
 
				-                    html.find("h2", text="Corporate Author")
			
 
				-                    .findNext("div")
			
 
				-                    .findNext("div")
			
 
				-                    .text
			
 
				-                )
			
 
				-            doctype = (
			
 
				-                html.find("h2", text="Document Type").findNext("div").findNext("div").text
			
 
				-            )
			
 
				-            for link in links:
			
 
				-                url = link.attrs["href"]
			
 
				+        # TODO: might improve speed by first searching for class="document-line" and then operating on thie resulting subtree for the info
			
 
				+        try:
			
 
				+            subtree = html.find_all(
			
 
				+                class_="field field--name-field-document-country field--type-termstore-entity-reference field--label-inline")
			
 
				+            country = subtree[0].find(class_="field--item").contents[0]
			
 
				+        except AttributeError:
			
 
				+            # author as backup for country
			
 
				+            subtree = html.find_all(class_="field--name-field-document-ca")
			
 
				+            country = subtree[0].find(class_="field--item").contents[0]
			
 
				+        # document type
			
 
				+        subtree = html.find_all(
			
 
				+            class_="field field--name-field-document-type field--type-termstore-entity-reference field--label-hidden field--items")
			
 
				+        doctype = subtree[0].find(class_="field--item").contents[0]
			
 
				+
			
 
				+        # get files
			
 
				+        sub_files = html.find(
			
 
				+            class_=["form-select form-control", "form-select form-control download"])
			
 
				+        files = sub_files.find_all("option", value=True)
			
 
				+        files = [file.attrs['value'] for file in files]
			
 
				+
			
 
				+        if len(files) > 0:
			
 
				+            for file in files:
			
 
				                 if not kind:
			
 
				-                    match = pattern.search(url.upper())
			
 
				+                    match = pattern.search(file.upper())
			
 
				                     if match:
			
 
				                         kind = match.group(0)
			
 
				                     else:
			
 
				+                        # TODO: check why search in filename makes sense (compared to
			
 
				+                        #  directly using doctype)
			
 
				                         if ("CRF" in doctype) or ("CRF" in title):
			
 
				                             kind = "CRF"
			
 
				                         elif ("SEF" in doctype) or ("SEF" in title):
			
@@ -80,10 +88,10 @@ def get_unfccc_submission_info(
 
				                     "Kind": kind,
			
 
				                     "Country": country,
			
 
				                     "Title": title,
			
 
				-                    "URL": url,
			
 
				+                    "URL": file,
			
 
				                 })
			
 
				 
			
 
				-            print("\t".join([kind, country, title, url]))
			
 
				+                print("\t".join([kind, country, title, file]))
			
 
				         else:
			
 
				             print(f"No files found for {url}")
			
 
				 
			
--- a/code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
+++ b/code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
@@ -56,10 +56,8 @@ aggregate_cats = {
 
				     '3.C.1': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
			
 
				     '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				             'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    'M.3.C.AG': {'sources': ['3.C.1.b', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				+    'M.3.C.AG': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
			
 
				             'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-    'M.3.C.LU': {'sources': ['3.C.1.c'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				     '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				     'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				 }
			
--- a/code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -45,13 +45,6 @@ regex_entity = r"^(.*)\s\("
 
				 gwp_to_use = 'AR4GWP100'
			
 
				 
			
 
				 # conversion to PRIMAP2 format
			
 
				-# manual category codes
			
 
				-cat_codes_manual = { # transform to PRIMAP1 form. PRIMAP2 form in next step with other codes
			
 
				-    'International bunkers': 'MBK',
			
 
				-    'Marine': 'MBKM',
			
 
				-    'Aviation': 'MBKA',
			
 
				-    'Multilateral operations': 'MMULTIOP',
			
 
				-}
			
 
				 
			
 
				 coords_terminologies = {
			
 
				     "area": "ISO3",
			
@@ -69,9 +62,9 @@ coords_defaults = {
 
				 coords_value_mapping = {
			
 
				     'unit': 'PRIMAP1',
			
 
				     'entity': {
			
 
				-        f"GHG {gwp_to_use}": f"KYOTOGHG {gwp_to_use}",
			
 
				-        f"HFC {gwp_to_use}": f"HFCS {gwp_to_use}",
			
 
				-        f"PFC {gwp_to_use}": f"PFCS {gwp_to_use}",
			
 
				+        f"GHG ({gwp_to_use})": f"KYOTOGHG ({gwp_to_use})",
			
 
				+        f"HFC ({gwp_to_use})": f"HFCS ({gwp_to_use})",
			
 
				+        f"PFC ({gwp_to_use})": f"PFCS ({gwp_to_use})",
			
 
				     },
			
 
				     'category': {
			
 
				         'Total national GHG emissions (with LULUCF)': '0',
			
@@ -80,6 +73,8 @@ coords_value_mapping = {
 
				         '1.A.3.a.i': 'M.BK.A',
			
 
				         '1.A.3.d.i': 'M.BK.M',
			
 
				         'CO2 from Biomass Combustion for Energy Production': 'M.BIO',
			
 
				+        '6 Other': '6',
			
 
				+        '2 H': '2.H',
			
 
				     },
			
 
				 }
			
 
				 
			
--- a/code/UNFCCC_reader/Morocco/config_MAR_BUR3.py
+++ b/code/UNFCCC_reader/Morocco/config_MAR_BUR3.py
@@ -0,0 +1,137 @@
 
				+# define which raw tables to combine
			
 
				+table_defs = {
			
 
				+    2010: {
			
 
				+        'Energy': [0, 1],
			
 
				+        'Agriculture': [10],
			
 
				+        'IPPU': [15, 16, 17],
			
 
				+        'LULUCF': [30],
			
 
				+        'Waste': [35],
			
 
				+    },
			
 
				+    2012: {
			
 
				+        'Energy': [2, 3],
			
 
				+        'Agriculture': [11],
			
 
				+        'IPPU': [18, 19, 20],
			
 
				+        'LULUCF': [31],
			
 
				+        'Waste': [36],
			
 
				+    },
			
 
				+    2014: {
			
 
				+        'Energy': [4, 5],
			
 
				+        'Agriculture': [10],
			
 
				+        'IPPU': [21, 22, 23],
			
 
				+        'LULUCF': [32],
			
 
				+        'Waste': [37],
			
 
				+    },
			
 
				+    2016: {
			
 
				+        'Energy': [6, 7],
			
 
				+        'Agriculture': [10],
			
 
				+        'IPPU': [24, 25, 26],
			
 
				+        'LULUCF': [33],
			
 
				+        'Waste': [38],
			
 
				+    },
			
 
				+    2018: {
			
 
				+        'Energy': [8, 9],
			
 
				+        'Agriculture': [14],
			
 
				+        'IPPU': [27, 28, 29],
			
 
				+        'LULUCF': [34],
			
 
				+        'Waste': [39],
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+header_defs = {
			
 
				+    'Energy': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				+        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+    'Agriculture': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				+        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+    'IPPU': [['Catégories', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				+        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+    'LULUCF': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				+        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+    'Waste': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				+        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				+}
			
 
				+
			
 
				+remove_cats = ['3.A.4', '3.B', '3.B.4', '1.B.2.a', '1.B.2.b', '1.B.2.c']
			
 
				+
			
 
				+cat_mapping = {
			
 
				+    "1.B.2.a.4": "1.B.2.a.iii.4",
			
 
				+    "1.B.2.a.5": "1.B.2.a.iii.5",
			
 
				+    "1.B.2.a.6": "1.B.2.a.iii.6",
			
 
				+    "1.B.2.b.2": "1.B.2.b.iii.2",
			
 
				+    "1.B.2.b.4": "1.B.2.b.iii.4",
			
 
				+    "1.B.2.b.5": "1.B.2.b.iii.5",
			
 
				+    "1.B.2.b.6": "1.B.2.b.iii.6",
			
 
				+    "1.B.2.c.1": "1.B.2.b.i", # simplification, split to oil and gas ("1.B.2.X.i")
			
 
				+    "1.B.2.c.2": "1.B.2.b.ii", # simplification, split to oil and gas ("1.B.2.X.ii")
			
 
				+    '1.A.2.g': '1.A.2.m', # other industry
			
 
				+    '3.A': '3.A.1', # enteric fermentation
			
 
				+    '3.A.1': '3.A.1.a', # cattle
			
 
				+    '3.A.1.a': '3.A.1.a.i',
			
 
				+    '3.A.1.b': '3.A.1.a.ii',
			
 
				+    '3.A.2': '3.A.1.c',
			
 
				+    '3.A.3': '3.A.1.h', # Swine
			
 
				+    '3.A.4.a': '3.A.1.d', # goats
			
 
				+    '3.A.4.b': '3.A.1.e', # camels
			
 
				+    '3.A.4.c': '3.A.1.f', # horses
			
 
				+    '3.A.4.d': '3.A.1.g', # Mules and asses
			
 
				+    '3.A.4.e': '3.A.1.i', # poultry
			
 
				+#    '3.B': '3.A.2', # Manure Management
			
 
				+    '3.B.1': '3.A.2.a', # cattle
			
 
				+    '3.B.1.a': '3.A.2.a.i',
			
 
				+    '3.B.1.b': '3.A.2.a.ii',
			
 
				+    '3.B.2': '3.A.2.c', # Sheep
			
 
				+    '3.B.3': '3.A.2.h', # Swine
			
 
				+    '3.B.4.a': '3.A.2.d', # Goats
			
 
				+    '3.B.4.b': '3.A.2.e', # Camels
			
 
				+    '3.B.4.c': '3.A.2.f', # Horses
			
 
				+    '3.B.4.d': '3.A.2.g', # Mules and Asses
			
 
				+    '3.B.4.e': '3.A.2.i', # Poultry
			
 
				+    '3.B.5': '3.C.6', # indirect N2O from manure management
			
 
				+    '3.C': '3.C.7', # rice
			
 
				+    '3.D': 'M.3.C.45AG', # Agricultural soils
			
 
				+    '3.D.a': '3.C.4', #direct N2O from agri soils
			
 
				+    '3.D.a.1': '3.C.4.a', # inorganic fertilizers
			
 
				+    '3.D.a.2': '3.C.4.b', # organic fertilizers
			
 
				+    '3.D.a.3': '3.C.4.c', # urine and dung by grazing animals
			
 
				+    '3.D.a.4': '3.C.4.d', # N in crop residues
			
 
				+    '3.D.b': '3.C.5', # indirect N2O from managed soils
			
 
				+    '3.D.b.1': '3.C.5.a', # Atmospheric deposition
			
 
				+    '3.D.b.2': '3.C.5.b', # nitrogen leeching and runoff
			
 
				+    '3.H': '3.C.3', # urea application
			
 
				+    'LU.3.B.1': '3.B.1', # forest
			
 
				+    'LU.3.B.2': '3.B.2', # cropland
			
 
				+    'LU.3.B.3': '3.B.3', # grassland
			
 
				+    'LU.3.B.4': '3.B.4', # wetland
			
 
				+    'LU.3.B.5': '3.B.5', # Settlements
			
 
				+    'LU.3.B.6': '3.B.6', # other land
			
 
				+}
			
 
				+
			
 
				+aggregate_cats = {
			
 
				+    '1.B.2.a.iii': {'sources': ['1.B.2.a.iii.4', '1.B.2.a.iii.5', '1.B.2.a.iii.6'],
			
 
				+                    'name': 'All Other'},
			
 
				+    '1.B.2.b.iii': {'sources': ['1.B.2.b.iii.2', '1.B.2.b.iii.4', '1.B.2.b.iii.5',
			
 
				+                                '1.B.2.b.iii.6',],
			
 
				+                    'name': 'All Other'},
			
 
				+    '1.B.2.a': {'sources': ['1.B.2.a.iii'], 'name': 'Oil'},
			
 
				+    '1.B.2.b': {'sources': ['1.B.2.b.i', '1.B.2.b.ii', '1.B.2.b.iii'],
			
 
				+                'name': 'Natural Gas'},
			
 
				+    '2.D':  {'sources': ['2.D.4'], 'name': 'Non-Energy Products from Fuels and Solvent Use'},
			
 
				+    '2.F.1':  {'sources': ['2.F.1.a', '2.F.1.b'], 'name': 'Refrigeration and Air Conditioning'},
			
 
				+    '2.F':  {'sources': ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
			
 
				+             'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
			
 
				+    '2.H':  {'sources': ["2.H.1", "2.H.2", "2.H.3"], 'name': 'Other'},
			
 
				+    '3.A.2': {'sources': ['3.A.2.a', '3.A.2.c', '3.A.2.d', '3.A.2.e', '3.A.2.f',
			
 
				+                          '3.A.2.g', '3.A.2.h', '3.A.2.i'],
			
 
				+              'name': 'Manure Management'},
			
 
				+    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				+    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
			
 
				+    '3.C': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				+    'M.3.C.AG': {'sources': ['3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				+    'M.AG': {'sources': ['3.A', 'M.3.C.AG'], 'name': 'Agriculture'},
			
 
				+    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
			
 
				+}
			
 
				+
			
 
				+zero_cats = ['1.B.2.a.i', '1.B.2.a.ii'] # venting and flaring with 0 for oil as
			
 
				+# all mapped to natural gas
			
--- a/code/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/code/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -4,8 +4,11 @@
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				 import pandas as pd
			
 
				-import numpy as np
			
 
				+import copy
			
 
				 from pathlib import Path
			
 
				+from config_MAR_BUR3 import zero_cats, cat_mapping, aggregate_cats, remove_cats, \
			
 
				+    table_defs, header_defs
			
 
				+from primap2.pm2io._data_reading import matches_time_format, filter_data
			
 
				 
			
 
				 # ###
			
 
				 # configuration
			
@@ -15,12 +18,8 @@ root_path = root_path.resolve()
 
				 downloaded_data_path = root_path / "downloaded_data"
			
 
				 extracted_data_path = root_path / "extracted_data"
			
 
				 
			
 
				-
			
 
				 input_folder = downloaded_data_path / 'UNFCCC' / 'Morocco' / 'BUR3'
			
 
				 output_folder = extracted_data_path / 'UNFCCC' / 'Morocco'
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				 output_filename = 'MAR_BUR3_2022_'
			
 
				 
			
 
				 inventory_file = 'Morocco_BUR3_Fr.pdf'
			
@@ -33,79 +32,29 @@ pages_to_read = range(104, 138)
 
				 
			
 
				 compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				-header_defs = {
			
 
				-    'Energy': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'Agriculture': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'IPPU': [['Catégories', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'LULUCF': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-    'Waste': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
			
 
				-        ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
			
 
				-}
			
 
				-
			
 
				-# define which raw tables to combine
			
 
				-table_defs = {
			
 
				-    2010: {
			
 
				-        'Energy': [0, 1],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [15, 16, 17],
			
 
				-        'LULUCF': [30],
			
 
				-        'Waste': [35],
			
 
				-    },
			
 
				-    2012: {
			
 
				-        'Energy': [2, 3],
			
 
				-        'Agriculture': [11],
			
 
				-        'IPPU': [18, 19, 20],
			
 
				-        'LULUCF': [31],
			
 
				-        'Waste': [36],
			
 
				-    },
			
 
				-    2014: {
			
 
				-        'Energy': [4, 5],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [21, 22, 23],
			
 
				-        'LULUCF': [32],
			
 
				-        'Waste': [37],
			
 
				-    },
			
 
				-    2016: {
			
 
				-        'Energy': [6, 7],
			
 
				-        'Agriculture': [10],
			
 
				-        'IPPU': [24, 25, 26],
			
 
				-        'LULUCF': [33],
			
 
				-        'Waste': [38],
			
 
				-    },
			
 
				-    2018: {
			
 
				-        'Energy': [8, 9],
			
 
				-        'Agriculture': [14],
			
 
				-        'IPPU': [27, 28, 29],
			
 
				-        'LULUCF': [34],
			
 
				-        'Waste': [39],
			
 
				-    },
			
 
				-}
			
 
				-
			
 
				 # special header as category code and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 index_cols = ['Catégories']
			
 
				 
			
 
				 # rows to remove
			
 
				-cats_remove = []
			
 
				+cats_remove = [
			
 
				+    'Agriculture' # always empty
			
 
				+]
			
 
				 
			
 
				 # manual category codes
			
 
				 cat_codes_manual = {
			
 
				     '1.A.2.e -Industries agro-alimentaires et du tabac': '1.A.2.e',
			
 
				     '1.A.2.f -Industries des minéraux non- métalliques': '1.A.2.f',
			
 
				-    'Agriculture': 'M.AG',
			
 
				+    #'Agriculture': 'M.AG',
			
 
				     '2. PIUP': '2',
			
 
				     'UTCATF': 'M.LULUCF',
			
 
				-    '3.B.1 Terres forestières': '3.B.1',
			
 
				-    '3.B.2 Terres cultivées': '3.B.2',
			
 
				-    '3.B.3 Prairies': '3.B.3',
			
 
				-    '3.B.4 Terres humides': '3.B.4',
			
 
				-    '3.B.5 Etablissements': '3.B.5',
			
 
				-    '3.B.6 Autres terres': '3.B.6',
			
 
				+    '3.B.1 Terres forestières': 'LU.3.B.1',
			
 
				+    '3.B.2 Terres cultivées': 'LU.3.B.2',
			
 
				+    '3.B.3 Prairies': 'LU.3.B.3',
			
 
				+    '3.B.4 Terres humides': 'LU.3.B.4',
			
 
				+    '3.B.5 Etablissements': 'LU.3.B.5',
			
 
				+    '3.B.6 Autres terres': 'LU.3.B.6',
			
 
				     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
			
 
				 }
			
 
				 
			
@@ -113,7 +62,7 @@ cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
 
				 
			
 
				 coords_terminologies = {
			
 
				     "area": "ISO3",
			
 
				-    "category": "IPCC2006_PRIMAP",
			
 
				+    "category": "IPCC1996_2006_MAR_Inv",
			
 
				     "scenario": "PRIMAP",
			
 
				 }
			
 
				 
			
@@ -140,9 +89,9 @@ coords_cols = {
 
				     "unit": "unit"
			
 
				 }
			
 
				 
			
 
				-add_coords_cols = {
			
 
				-    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-}
			
 
				+#add_coords_cols = {
			
 
				+#    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				+#}
			
 
				 
			
 
				 filter_remove = {
			
 
				     "f1": {
			
@@ -184,6 +133,13 @@ for year in table_defs.keys():
 
				         df_this_table = df_this_table.drop(df_this_table.iloc[0:2].index)
			
 
				         df_this_table.columns = header_defs[sector]
			
 
				 
			
 
				+        # fix 2018 agri table
			
 
				+        if (year == 2018) & (sector == "Agriculture"):
			
 
				+            last_shift_row = 25
			
 
				+            df_temp = df_this_table.iloc[0: last_shift_row, 1:].copy()
			
 
				+            df_this_table.iloc[0, 1:] = ''
			
 
				+            df_this_table.iloc[1: last_shift_row + 1, 1:] = df_temp
			
 
				+
			
 
				         # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				         df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
@@ -221,7 +177,7 @@ df_all = df_all.reset_index(drop=True)
 
				 
			
 
				 # prepare numbers for pd.to_numeric
			
 
				 df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ', '')
			
 
				-repl = lambda m: m.group('part1') + m.group('part2')
			
 
				+repl = lambda m: m.group('part1') + '.' +  m.group('part2')
			
 
				 df_all.loc[:, 'data'] = df_all.loc[:, 'data'].str.replace(
			
 
				     '(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				 df_all['data'][df_all['data'].isnull()] = 'NaN'
			
@@ -231,6 +187,9 @@ for entity in df_all["entity"].unique():
 
				     df_all["entity"][(df_all["entity"] == entity) & (
			
 
				                 df_all["unit"] == "GgCO2eq")] = f"{entity} ({gwp_to_use})"
			
 
				 
			
 
				+# drop "original_cat_name" as it has non-unique values per category
			
 
				+df_all = df_all.drop(columns="orig_cat_name")
			
 
				+
			
 
				 data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				     df_all,
			
 
				     coords_cols=coords_cols,
			
@@ -265,9 +224,94 @@ data_pm2 = data_pm2.drop_vars(entities_to_convert)
 
				 # convert back to IF to have units in the fixed format
			
 
				 data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-##### save data to IF and native format ####
			
 
				+# ###
			
 
				+# convert to IPCC2006 categories
			
 
				+# ###
			
 
				+data_if_2006 = copy.deepcopy(data_if)
			
 
				+data_if_2006.attrs = copy.deepcopy(data_if.attrs)
			
 
				+
			
 
				+filter_remove_cats = {
			
 
				+    "cat": {
			
 
				+        f"category ({coords_terminologies['category']})":
			
 
				+    remove_cats
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+filter_data(data_if_2006, filter_remove=filter_remove_cats)
			
 
				+
			
 
				+# map categories
			
 
				+data_if_2006 = data_if_2006.replace(
			
 
				+    {f"category ({coords_terminologies['category']})": cat_mapping})
			
 
				+data_if_2006[f"category ({coords_terminologies['category']})"].unique()
			
 
				+
			
 
				+# rename the category col
			
 
				+data_if_2006.rename(columns={
			
 
				+    f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
			
 
				+                    inplace=True)
			
 
				+data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				+data_if_2006.attrs['dimensions']['*'] = [
			
 
				+    'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
			
 
				+    else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				+# aggregate categories
			
 
				+time_format = '%Y'
			
 
				+time_columns = [
			
 
				+    col
			
 
				+    for col in data_if_2006.columns.values
			
 
				+    if matches_time_format(col, time_format)
			
 
				+]
			
 
				+
			
 
				+for cat_to_agg in aggregate_cats:
			
 
				+    mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				+        aggregate_cats[cat_to_agg]["sources"])
			
 
				+    df_test = data_if_2006[mask]
			
 
				+    # print(df_test)
			
 
				+
			
 
				+    if len(df_test) > 0:
			
 
				+        print(f"Aggregating category {cat_to_agg}")
			
 
				+        df_combine = df_test.copy(deep=True)
			
 
				+
			
 
				+        for col in time_columns:
			
 
				+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				+
			
 
				+        df_combine = df_combine.groupby(
			
 
				+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				+                'unit']).sum(min_count=1)
			
 
				+
			
 
				+        df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				+        # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+        # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				+
			
 
				+        df_combine = df_combine.reset_index()
			
 
				+
			
 
				+        data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
			
 
				+        data_if_2006 = data_if_2006.reset_index(drop=True)
			
 
				+    else:
			
 
				+        print(f"no data to aggregate category {cat_to_agg}")
			
 
				+
			
 
				+for cat in zero_cats:
			
 
				+    entities = data_if_2006["entity"].unique()
			
 
				+    data_zero = data_if_2006[data_if_2006["category (IPCC2006_PRIMAP)"]=="1"].copy(
			
 
				+        deep=True)
			
 
				+    data_zero["category (IPCC2006_PRIMAP)"] = cat
			
 
				+    for col in time_columns:
			
 
				+        data_zero[col] = 0
			
 
				+
			
 
				+    data_if_2006 = pd.concat([data_if_2006, data_zero])
			
 
				+
			
 
				+# conversion to PRIMAP2 native format
			
 
				+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				+
			
 
				+# convert back to IF to have units in the fixed format
			
 
				+data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				+
			
 
				+
			
 
				+# ###
			
 
				+# save data to IF and native format
			
 
				+# ###
			
 
				 if not output_folder.exists():
			
 
				     output_folder.mkdir()
			
 
				+
			
 
				+# data in original categories
			
 
				 pm2.pm2io.write_interchange_format(
			
 
				     output_folder / (output_filename + coords_terminologies["category"]), data_if)
			
 
				 
			
@@ -276,3 +320,10 @@ data_pm2.pr.to_netcdf(
 
				     output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				     encoding=encoding)
			
 
				 
			
 
				+# data in 2006 categories
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+
			
 
				+encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				+data_pm2_2006.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
			
--- a/code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -304,6 +304,10 @@ data_indirect_pm2 = pm2.pm2io.from_interchange_format(data_indirect_IF)
 
				 
			
 
				 data_all = data_inventory_pm2.pr.merge(data_main_sector_ts_pm2)
			
 
				 data_all = data_all.pr.merge(data_indirect_pm2)
			
 
				+
			
 
				+# combine CO2 emissions and absorptions
			
 
				+data_all["CO2"] = data_all['CO2 removals'] + data_all['CO2 emissions']
			
 
				+
			
 
				 data_all_if = data_all.pr.to_interchange_format()
			
 
				 
			
 
				 
			
--- a/downloaded_data/UNFCCC/submissions-annexI_2022.csv
+++ b/downloaded_data/UNFCCC/submissions-annexI_2022.csv
@@ -1 +1 @@
 
				-../../.git/annex/objects/wm/95/MD5E-s1--68b329da9893e34099c7d8ad5cb9c940.csv/MD5E-s1--68b329da9893e34099c7d8ad5cb9c940.csv
			
 
				+../../.git/annex/objects/Vm/X3/MD5E-s28534--a0b2bc09b840b25b6c24806403087be8.csv/MD5E-s28534--a0b2bc09b840b25b6c24806403087be8.csv