1 year ago · 2586dc91b8
--- a/UNFCCC_GHG_data/UNFCCC_reader/Thailand/config_THA_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Thailand/config_THA_BUR3.py
@@ -0,0 +1,373 @@
 
				+# configuration for Thailand, BUR4
			
 
				+# ###
			
 
				+# for reading
			
 
				+# ###
			
 
				+
			
 
				+# general
			
 
				+gwp_to_use = "AR4GWP100"
			
 
				+terminology_proc = 'IPCC2006_PRIMAP'
			
 
				+
			
 
				+header_inventory = ['Greenhouse gas source and sink categories',
			
 
				+                   'CO2 emissions', 'CO2 removals',
			
 
				+                   'CH4', 'N2O', 'NOx', 'CO', 'NMVOCs',
			
 
				+                   'SO2', 'HFCs', 'PFCs', 'SF6']
			
 
				+unit_inventory = ['Gg'] * len(header_inventory)
			
 
				+unit_inventory[9] = "GgCO2eq"
			
 
				+unit_inventory[10] = "GgCO2eq"
			
 
				+
			
 
				+# 2019 inventory
			
 
				+inv_conf = {
			
 
				+    'year': 2016,
			
 
				+    'entity_row': 0,
			
 
				+    'unit_row': 1,
			
 
				+    'index_cols': "Greenhouse gas source and sink categories",
			
 
				+    'header': header_inventory,
			
 
				+    'unit': unit_inventory,
			
 
				+    # special header as category UNFCCC_GHG_data and name in one column
			
 
				+    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
			
 
				+    # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				+    # # automatically with the other codes)
			
 
				+    'cat_codes_manual': {
			
 
				+        '6. Other Memo Items (not accounted in Total Emissions)': 'MEMO',
			
 
				+        'International Bunkers': 'MBK',
			
 
				+        'CO2 from Biomass': 'MBIO',
			
 
				+    },
			
 
				+    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
			
 
				+}
			
 
				+
			
 
				+# primap2 format conversion
			
 
				+coords_cols = {
			
 
				+    "category": "category",
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_terminologies = {
			
 
				+    "area": "ISO3",
			
 
				+    "category": "IPCC1996_2006_THA_Inv",
			
 
				+    "scenario": "PRIMAP",
			
 
				+}
			
 
				+
			
 
				+coords_defaults = {
			
 
				+    "source": "THA-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "THA",
			
 
				+    "scenario": "BUR3",
			
 
				+}
			
 
				+
			
 
				+coords_value_mapping = {
			
 
				+    "unit": "PRIMAP1",
			
 
				+    "category": "PRIMAP1",
			
 
				+    "entity": {
			
 
				+        'HFCs': f"HFCS ({gwp_to_use})",
			
 
				+        'PFCs': f"PFCS ({gwp_to_use})",
			
 
				+        'NMVOCs': 'NMVOC',
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+filter_remove = {
			
 
				+    'f_memo': {"category": "MEMO"},
			
 
				+}
			
 
				+filter_keep = {}
			
 
				+
			
 
				+meta_data = {
			
 
				+    "references": "https://unfccc.int/documents/267629",
			
 
				+    "rights": "",
			
 
				+    "contact": "mail@johannes-guetschow.de",
			
 
				+    "title": "Thailand. Biennial update report (BUR). BUR3",
			
 
				+    "comment": "Read fom pdf by Johannes Gütschow",
			
 
				+    "institution": "UNFCCC",
			
 
				+}
			
 
				+
			
 
				+# main sector time series
			
 
				+header_main_sector_ts = [
			
 
				+    'Year', 'Energy', 'IPPU',
			
 
				+    'Agriculture', 'LULUCF', 'Waste',
			
 
				+    'Net emissions (Including LULUCF)',
			
 
				+    'Net emissions (Excluding LULUCF)']
			
 
				+unit_main_sector_ts = ['GgCO2eq'] * len(header_main_sector_ts)
			
 
				+unit_main_sector_ts[0] = ''
			
 
				+
			
 
				+trend_conf = {
			
 
				+    'header': header_main_sector_ts,
			
 
				+    'unit': unit_main_sector_ts,
			
 
				+    # manual category codes (manual mapping to primap1, will be mapped to primap2
			
 
				+    # automatically with the other codes)
			
 
				+    'cat_codes_manual': {
			
 
				+        'Energy': "1",
			
 
				+        'IPPU': "2",
			
 
				+        'Agriculture': "3",
			
 
				+        'LULUCF': "4",
			
 
				+        'Waste': "5",
			
 
				+        'Net emissions (Including LULUCF)': "0",
			
 
				+        'Net emissions (Excluding LULUCF)': "M0EL",
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+coords_cols_main_sector_ts = {
			
 
				+    "category": "category",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_defaults_main_sector_ts = {
			
 
				+    "source": "THA-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "THA",
			
 
				+    "scenario": "BUR3",
			
 
				+    "entity": f"KYOTOGHG ({gwp_to_use})",
			
 
				+}
			
 
				+
			
 
				+# indirect gases time series
			
 
				+header_indirect = ['Year', 'NOx', 'CO',
			
 
				+                    'NMVOCs', 'SO2']
			
 
				+unit_indirect = ['Gg'] * len(header_indirect)
			
 
				+unit_indirect[0] = ''
			
 
				+ind_conf = {
			
 
				+    'header': header_indirect,
			
 
				+    'unit': unit_indirect,
			
 
				+    'cols_to_remove': ['Average Annual Growth Rate'],
			
 
				+}
			
 
				+
			
 
				+coords_cols_indirect = {
			
 
				+    "entity": "entity",
			
 
				+    "unit": "unit",
			
 
				+}
			
 
				+
			
 
				+coords_defaults_indirect = {
			
 
				+    "source": "THA-GHG-Inventory",
			
 
				+    "provenance": "measured",
			
 
				+    "area": "THA",
			
 
				+    "scenario": "BUR3",
			
 
				+    "category": "0",
			
 
				+}
			
 
				+
			
 
				+# ###
			
 
				+# for processing
			
 
				+# ###
			
 
				+# aggregate categories
			
 
				+country_processing_step1 = {
			
 
				+    'aggregate_cats': {
			
 
				+        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
			
 
				+                  'name': 'Other Process uses of Carbonates'},
			
 
				+    },
			
 
				+    'aggregate_gases': {
			
 
				+        'KYOTOGHG': {
			
 
				+            'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				+            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
			
 
				+                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
			
 
				+            'skipna': True,
			
 
				+            'min_count': 1,
			
 
				+            'sel': {f'category ({coords_terminologies["category"]})':
			
 
				+                [
			
 
				+                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				+                    '1.A.4', '1.B', '1.B.1', '1.B.2',
			
 
				+                    '1.C',
			
 
				+                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				+                    '2.B', '2.C', '2.D', '2.H',
			
 
				+                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				+                    '3.H', '3.I',
			
 
				+                    '4', '4.A', '4.B', '4.C', '4.D', '4.E',
			
 
				+                    '5', '5.A', '5.B', '5.C', '5.D'
			
 
				+                ]
			
 
				+            }, # not tested
			
 
				+        },
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+country_processing_step2 = {
			
 
				+    'downscale': {
			
 
				+        # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
			
 
				+        # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
			
 
				+        'sectors': {
			
 
				+            '1': {
			
 
				+                'basket': '1',
			
 
				+                'basket_contents': ['1.A', '1.B', '1.C'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '1.A': {
			
 
				+                'basket': '1.A',
			
 
				+                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '1.B': {
			
 
				+                'basket': '1.B',
			
 
				+                'basket_contents': ['1.B.1', '1.B.2'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '2': {
			
 
				+                'basket': '2',
			
 
				+                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.H'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '2.A': {
			
 
				+                'basket': '2.A',
			
 
				+                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '3': {
			
 
				+                'basket': '3',
			
 
				+                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				+                                    '3.H', '3.I'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '4': {
			
 
				+                'basket': '4',
			
 
				+                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+            '5': {
			
 
				+                'basket': '5',
			
 
				+                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
			
 
				+                'entities': ['KYOTOGHG (AR4GWP100)'],
			
 
				+                'dim': f'category ({coords_terminologies["category"]})',
			
 
				+            },
			
 
				+        },
			
 
				+        'entities': {
			
 
				+            'KYOTO': {
			
 
				+                'basket': 'KYOTOGHG (AR4GWP100)',
			
 
				+                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
			
 
				+                                    'PFCS (AR4GWP100)', 'SF6'],
			
 
				+                'sel': {f'category ({coords_terminologies["category"]})':
			
 
				+                    [
			
 
				+                        '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
			
 
				+                        '1.A.4', '1.B', '1.B.1', '1.B.2', '1.C',
			
 
				+                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
			
 
				+                        '2.B', '2.C', '2.D', '2.H',
			
 
				+                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
			
 
				+                        '3.H', '3.I',
			
 
				+                        '4', '4.A', '4.B', '4.C', '4.D', '4.E',
			
 
				+                        '5', '5.A', '5.B', '5.C', '5.D']},
			
 
				+            },
			
 
				+        },
			
 
				+    },
			
 
				+    'basket_copy': {
			
 
				+        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
			
 
				+        'entities': ["HFCS", "PFCS"],
			
 
				+        'source_GWP': 'AR4GWP100',
			
 
				+    },
			
 
				+}
			
 
				+## not in BUR3: 1.A.1.a, 1.A.1.b, 1.A.3.a, 1.A.3.b, 1.A.3.c, 1.A.3.d, 1.A.5, 1.B.3,
			
 
				+# 2.B.x, 2.F, 2.G
			
 
				+# 4.E.x, 5.X.y M.BK.A, M.BK.M
			
 
				+
			
 
				+cat_conversion = {
			
 
				+    'mapping': {
			
 
				+        '0': '0',
			
 
				+        'M.0.EL': 'M.0.EL',
			
 
				+        '1': '1',
			
 
				+        '1.A': '1.A',
			
 
				+        '1.A.1': '1.A.1',
			
 
				+        '1.A.2': '1.A.2',
			
 
				+        '1.A.3': '1.A.3',
			
 
				+        '1.A.4': '1.A.4',
			
 
				+        '1.B': '1.B',
			
 
				+        '1.B.1': '1.B.1',
			
 
				+        '1.B.2': '1.B.2',
			
 
				+        '1.C': '1.C',
			
 
				+        '1.C.1': '1.C.1',
			
 
				+        '1.C.2': '1.C.2',
			
 
				+        '1.C.3': '1.C.3',
			
 
				+        '2': '2',
			
 
				+        '2.A': '2.A',
			
 
				+        '2.A.1': '2.A.1',
			
 
				+        '2.A.2': '2.A.2',
			
 
				+        '2.A.3': '2.A.3',
			
 
				+        '2.A.4': '2.A.4',
			
 
				+        '2.A.4.b': '2.A.4.b',
			
 
				+        '2.A.4.d': '2.A.4.d',
			
 
				+        '2.B': '2.B',
			
 
				+        '2.C': '2.C',
			
 
				+        '2.C.1': '2.C.1',
			
 
				+        '2.D': '2.D',
			
 
				+        '2.D.1': '2.D.1',
			
 
				+        '2.H': '2.H',
			
 
				+        '2.H.1': '2.H.1',
			
 
				+        '2.H.2': '2.H.2',
			
 
				+        '3': 'M.AG',
			
 
				+        '3.A': '3.A.1',
			
 
				+        '3.B': '3.A.2',
			
 
				+        '3.C': 'M.3.C.1.AG',  # field burning of agricultural residues
			
 
				+        '3.D': '3.C.2',  # Liming
			
 
				+        '3.E': '3.C.3',  # urea application
			
 
				+        '3.F': '3.C.4',  # direct N2O from agri soils
			
 
				+        '3.G': '3.C.5',  # indirect N2O from agri soils
			
 
				+        '3.H': '3.C.6',  # indirect N2O from manure management
			
 
				+        '3.I': '3.C.7',  # rice
			
 
				+        '4': 'M.LULUCF',
			
 
				+        '4.A': '3.B.1.a',  # forest remaining forest
			
 
				+        '4.B': '3.B.2.a',  # cropland remaining cropland
			
 
				+        '4.C': '3.B.2.b',  # land converted to cropland
			
 
				+        '4.D': '3.B.6.b',  # land converted to other land
			
 
				+        '4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
			
 
				+        '5': '4',
			
 
				+        '5.A': '4.A',
			
 
				+        '5.B': '4.B',
			
 
				+        '5.C': '4.C',
			
 
				+        '5.D': '4.D',
			
 
				+        'M.BK': 'M.BK',
			
 
				+        'M.BIO': 'M.BIO',
			
 
				+    },
			
 
				+    'aggregate': {
			
 
				+        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				+        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
			
 
				+                  'name': 'Emissions from Biomass Burning'},
			
 
				+        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				+                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				+        'M.3.C.AG': {
			
 
				+            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				+            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				+        'M.AG.ELV': {'sources': ['M.3.C.AG'],
			
 
				+                     'name': 'Agriculture excluding livestock emissions'},
			
 
				+        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
			
 
				+                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				+        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
			
 
				+        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
			
 
				+        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
			
 
				+        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
			
 
				+        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
			
 
				+        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+sectors_to_save = [
			
 
				+    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4',
			
 
				+    '1.B', '1.B.1', '1.B.2', '1.C', '1.C.1', '1.C.2', '1.C.3',
			
 
				+    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
			
 
				+    '2.B', '2.C', '2.C.1', '2.H', '2.H.1', '2.H.2',
			
 
				+    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
			
 
				+    '3.C', '3.C.1', '3.C.2', '3.C.3', '3.C.4',
			
 
				+    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
			
 
				+    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
			
 
				+    '3.B.2.b', '3.B.6', '3.B.6.b',
			
 
				+    '4', '4.A', '4.B', '4.C', '4.D',
			
 
				+    '0', 'M.0.EL', 'M.BK', 'M.BIO']
			
 
				+
			
 
				+
			
 
				+# gas baskets
			
 
				+gas_baskets = {
			
 
				+    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3',
			
 
				+                           'Unspecified mix of HFCs (SARGWP100)',
			
 
				+                           'Unspecified mix of PFCs (SARGWP100)'],
			
 
				+    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3',
			
 
				+                           'Unspecified mix of HFCs (AR4GWP100)',
			
 
				+                           'Unspecified mix of PFCs (AR4GWP100)'],
			
 
				+    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3',
			
 
				+                          'Unspecified mix of HFCs (AR5GWP100)',
			
 
				+                          'Unspecified mix of PFCs (AR5GWP100)'
			
 
				+                          ],
			
 
				+    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3',
			
 
				+                          'Unspecified mix of HFCs (AR6GWP100)',
			
 
				+                          'Unspecified mix of PFCs (AR6GWP100)'
			
 
				+                          ],
			
 
				+    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
			
 
				+    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
			
 
				+    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
			
 
				+    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
			
 
				+}
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -1,12 +1,23 @@
 
				 # this script reads data from Thailand's BUR3
			
 
				 # Data is read from the pdf file
			
 
				+
			
 
				+import os
			
 
				+os.environ["UNFCCC_GHG_ROOT_PATH"] = \
			
 
				+     "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				 import camelot
			
 
				-import copy
			
 
				 
			
 
				+from UNFCCC_GHG_data.helper import process_data_for_country
			
 
				 from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from primap2.pm2io._data_reading import matches_time_format
			
 
				+from config_THA_BUR3 import inv_conf, trend_conf, ind_conf
			
 
				+from config_THA_BUR3 import coords_cols, coords_defaults, coords_terminologies, \
			
 
				+    coords_value_mapping, filter_remove, filter_keep, meta_data
			
 
				+from config_THA_BUR3 import coords_cols_main_sector_ts, coords_defaults_main_sector_ts
			
 
				+from config_THA_BUR3 import coords_defaults_indirect, coords_cols_indirect
			
 
				+from config_THA_BUR3 import gas_baskets, cat_conversion, terminology_proc, \
			
 
				+    sectors_to_save
			
 
				+from config_THA_BUR3 import country_processing_step1, country_processing_step2
			
 
				 
			
 
				 # ###
			
 
				 # configuration
			
@@ -23,150 +34,35 @@ compression = dict(zlib=True, complevel=9)
 
				 
			
 
				 # inventory tables
			
 
				 pages_inventory = '68,69'
			
 
				-header_inventory = ['Greenhouse gas source and sink categories',
			
 
				-                   'CO2 emissions', 'CO2 removals',
			
 
				-                   'CH4', 'N2O', 'NOx', 'CO', 'NMVOCs',
			
 
				-                   'SO2', 'HFCs', 'PFCs', 'SF6']
			
 
				-unit_inventory = ['Gg'] * len(header_inventory)
			
 
				-unit_inventory[9] = "GgCO2eq"
			
 
				-unit_inventory[10] = "GgCO2eq"
			
 
				-
			
 
				-year = 2016
			
 
				-entity_row = 0
			
 
				-unit_row = 1
			
 
				-gwp_to_use = "AR4GWP100"
			
 
				-
			
 
				-index_cols = "Greenhouse gas source and sink categories"
			
 
				-# special header as category UNFCCC_GHG_data and name in one column
			
 
				-header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				-
			
 
				-# manual category codes
			
 
				-cat_codes_manual = {
			
 
				-    '6. Other Memo Items (not accounted in Total Emissions)': 'MEMO',
			
 
				-    'International Bunkers': 'MBK',
			
 
				-    'CO2 from Biomass': 'MBIO',
			
 
				-}
			
 
				-
			
 
				-cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
			
 
				-
			
 
				-coords_cols = {
			
 
				-    "category": "category",
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit",
			
 
				-}
			
 
				-
			
 
				-
			
 
				-coords_terminologies = {
			
 
				-    "area": "ISO3",
			
 
				-    "category": "IPCC1996_2006_THA_Inv",
			
 
				-    "scenario": "PRIMAP",
			
 
				-}
			
 
				-
			
 
				-coords_defaults = {
			
 
				-    "source": "THA-GHG-Inventory",
			
 
				-    "provenance": "measured",
			
 
				-    "area": "THA",
			
 
				-    "scenario": "BUR3",
			
 
				-}
			
 
				-
			
 
				-coords_value_mapping = {
			
 
				-    "unit": "PRIMAP1",
			
 
				-    "category": "PRIMAP1",
			
 
				-    "entity": {
			
 
				-        'HFCs': f"HFCS ({gwp_to_use})",
			
 
				-        'PFCs': f"PFCS ({gwp_to_use})",
			
 
				-        'NMVOCs': 'NMVOC',
			
 
				-    },
			
 
				-}
			
 
				-
			
 
				-
			
 
				-filter_remove = {
			
 
				-    'f_memo': {"category": "MEMO"},
			
 
				-}
			
 
				-filter_keep = {}
			
 
				-
			
 
				-meta_data = {
			
 
				-    "references": "https://unfccc.int/documents/267629",
			
 
				-    "rights": "",
			
 
				-    "contact": "mail@johannes-guetschow.de",
			
 
				-    "title": "Thailand. Biennial update report (BUR). BUR3",
			
 
				-    "comment": "Read fom pdf by Johannes Gütschow",
			
 
				-    "institution": "UNFCCC",
			
 
				-}
			
 
				 
			
 
				 # main sector time series
			
 
				 page_main_sector_ts = '70'
			
 
				-header_main_sector_ts = ['Year', 'Energy', 'IPPU',
			
 
				-                    'Agriculture', 'LULUCF', 'Waste',
			
 
				-                    'Net emissions (Including LULUCF)',
			
 
				-                    'Net emissions (Excluding LULUCF)']
			
 
				-unit_main_sector_ts = ['GgCO2eq'] * len(header_main_sector_ts)
			
 
				-unit_main_sector_ts[0] = ''
			
 
				-
			
 
				-# manual category codes
			
 
				-cat_codes_manual_main_sector_ts = {
			
 
				-    'Energy': "1",
			
 
				-    'IPPU': "2",
			
 
				-    'Agriculture': "3",
			
 
				-    'LULUCF': "4",
			
 
				-    'Waste': "5",
			
 
				-    'Net emissions (Including LULUCF)': "0",
			
 
				-    'Net emissions (Excluding LULUCF)': "M0EL",
			
 
				-}
			
 
				-
			
 
				-coords_cols_main_sector_ts = {
			
 
				-    "category": "category",
			
 
				-    "unit": "unit",
			
 
				-}
			
 
				-
			
 
				-coords_defaults_main_sector_ts = {
			
 
				-    "source": "THA-GHG-Inventory",
			
 
				-    "provenance": "measured",
			
 
				-    "area": "THA",
			
 
				-    "scenario": "BUR3",
			
 
				-    "entity": f"KYOTOGHG ({gwp_to_use})"
			
 
				-}
			
 
				 
			
 
				 # indirect gases time series
			
 
				 page_indirect = '72'
			
 
				-header_indirect = ['Year', 'NOx', 'CO',
			
 
				-                    'NMVOCs', 'SO2']
			
 
				-unit_indirect = ['Gg'] * len(header_indirect)
			
 
				-unit_indirect[0] = ''
			
 
				-
			
 
				-cols_to_remove = ['Average Annual Growth Rate']
			
 
				-
			
 
				-coords_cols_indirect = {
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit",
			
 
				-}
			
 
				-
			
 
				-coords_defaults_indirect = {
			
 
				-    "source": "THA-GHG-Inventory",
			
 
				-    "provenance": "measured",
			
 
				-    "area": "THA",
			
 
				-    "scenario": "BUR3",
			
 
				-    "category": "0"
			
 
				-}
			
 
				 
			
 
				 
			
 
				 # ###
			
 
				 # read the inventory data and convert to PM2 IF
			
 
				 # ###
			
 
				-
			
 
				 tables_inventory = camelot.read_pdf(str(input_folder / inventory_file), pages=pages_inventory,
			
 
				                                     split_text=True, flavor="lattice")
			
 
				 
			
 
				 df_inventory = tables_inventory[0].df[1:]
			
 
				-df_header = pd.DataFrame([header_inventory, unit_inventory])
			
 
				+df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				 
			
 
				-df_inventory = pd.concat([df_header, df_inventory, tables_inventory[1].df.iloc[1:]], axis=0, join='outer')
			
 
				+df_inventory = pd.concat([df_header, df_inventory, tables_inventory[1].df.iloc[1:]],
			
 
				+                         axis=0, join='outer')
			
 
				 
			
 
				-df_inventory = pm2.pm2io.nir_add_unit_information(df_inventory, unit_row=unit_row, entity_row=entity_row,
			
 
				-                                                  regexp_entity=".*", regexp_unit=".*", default_unit="Gg")
			
 
				+df_inventory = pm2.pm2io.nir_add_unit_information(df_inventory,
			
 
				+                                                  unit_row=inv_conf["unit_row"],
			
 
				+                                                  entity_row=inv_conf["entity_row"],
			
 
				+                                                  regexp_entity=".*", regexp_unit=".*",
			
 
				+                                                  default_unit="Gg")
			
 
				 # set index and convert to long format
			
 
				-df_inventory = df_inventory.set_index(index_cols)
			
 
				-df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, year, header_long)
			
 
				+df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				+df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
			
 
				+                                                     inv_conf["header_long"])
			
 
				 df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				 
			
 
				 # prep for conversion to PM2 IF and native format
			
@@ -175,16 +71,22 @@ df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
 
				 
			
 
				 # replace cat names by codes in col "category"
			
 
				 # first the manual replacements
			
 
				-df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
			
 
				+df_inventory_long["category"] = \
			
 
				+    df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
			
 
				 # then the regex replacements
			
 
				-repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				-df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				+repl = lambda m: m.group('code')
			
 
				+df_inventory_long["category"] = \
			
 
				+    df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
			
 
				+                                              regex=True)
			
 
				 df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				 
			
 
				 # replace "," with "" in data
			
 
				 repl = lambda m: m.group('part1') + m.group('part2')
			
 
				-df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				-df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.replace(' ','', regex=False)
			
 
				+df_inventory_long.loc[:, "data"] = \
			
 
				+    df_inventory_long.loc[:, "data"].str.replace(
			
 
				+        '(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				+df_inventory_long.loc[:, "data"] = df_inventory_long.loc[:, "data"].str.\
			
 
				+    replace(' ','', regex=False)
			
 
				 
			
 
				 # make sure all col headers are str
			
 
				 df_inventory_long.columns = df_inventory_long.columns.map(str)
			
@@ -202,7 +104,8 @@ data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
 
				     filter_remove=filter_remove,
			
 
				     #filter_keep=filter_keep,
			
 
				     meta_data=meta_data,
			
 
				-    convert_str=True
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				     )
			
 
				 
			
 
				 # ###
			
@@ -214,7 +117,7 @@ tables_main_sector_ts = camelot.read_pdf(str(input_folder / inventory_file), pag
 
				 df_main_sector_ts = tables_main_sector_ts[0].df.iloc[2:]
			
 
				 #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				 #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				-df_main_sector_ts.columns = [header_main_sector_ts, unit_main_sector_ts]
			
 
				+df_main_sector_ts.columns = [trend_conf["header"], trend_conf["unit"]]
			
 
				 
			
 
				 df_main_sector_ts = df_main_sector_ts.transpose()
			
 
				 df_main_sector_ts = df_main_sector_ts.reset_index(drop=False)
			
@@ -225,13 +128,16 @@ df_main_sector_ts.columns = cols
 
				 df_main_sector_ts = df_main_sector_ts.drop(0)
			
 
				 
			
 
				 # replace cat names by codes in col "category"
			
 
				-df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(cat_codes_manual_main_sector_ts)
			
 
				+df_main_sector_ts["category"] = df_main_sector_ts["category"].replace(
			
 
				+    trend_conf["cat_codes_manual"])
			
 
				 
			
 
				 repl = lambda m: m.group('part1') + m.group('part2')
			
 
				 year_cols = list(set(df_main_sector_ts.columns) - set(['category', 'unit']))
			
 
				 for col in year_cols:
			
 
				-    df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				-    df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.replace(' ','', regex=False)
			
 
				+    df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
			
 
				+        replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				+    df_main_sector_ts.loc[:, col] = df_main_sector_ts.loc[:, col].str.\
			
 
				+        replace(' ','', regex=False)
			
 
				 
			
 
				 data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				     df_main_sector_ts,
			
@@ -244,7 +150,8 @@ data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
 
				     filter_remove=filter_remove,
			
 
				     #filter_keep=filter_keep,
			
 
				     meta_data=meta_data,
			
 
				-    convert_str=True
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				     )
			
 
				 
			
 
				 
			
@@ -257,7 +164,7 @@ tables_indirect = camelot.read_pdf(str(input_folder / inventory_file), pages=pag
 
				 df_indirect = tables_indirect[0].df.iloc[2:]
			
 
				 #df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				 #df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				-df_indirect.columns = [header_indirect, unit_indirect]
			
 
				+df_indirect.columns = [ind_conf["header"], ind_conf["unit"]]
			
 
				 
			
 
				 df_indirect = df_indirect.transpose()
			
 
				 df_indirect = df_indirect.reset_index(drop=False)
			
@@ -266,13 +173,15 @@ cols.iloc[0] = "entity"
 
				 cols.iloc[1] = "unit"
			
 
				 df_indirect.columns = cols
			
 
				 df_indirect = df_indirect.drop(0)
			
 
				-df_indirect = df_indirect.drop(columns=cols_to_remove)
			
 
				+df_indirect = df_indirect.drop(columns=ind_conf["cols_to_remove"])
			
 
				 
			
 
				 repl = lambda m: m.group('part1') + m.group('part2')
			
 
				 year_cols = list(set(df_indirect.columns) - set(['entity', 'unit']))
			
 
				 for col in year_cols:
			
 
				-    df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				-    df_indirect.loc[:, col] = df_indirect.loc[:, col].str.replace(' ','', regex=False)
			
 
				+    df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
			
 
				+        replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
			
 
				+    df_indirect.loc[:, col] = df_indirect.loc[:, col].str.\
			
 
				+        replace(' ','', regex=False)
			
 
				 
			
 
				 data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				     df_indirect,
			
@@ -285,7 +194,8 @@ data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
 
				     #filter_remove=filter_remove,
			
 
				     #filter_keep=filter_keep,
			
 
				     meta_data=meta_data,
			
 
				-    convert_str=True
			
 
				+    convert_str=True,
			
 
				+    time_format="%Y",
			
 
				     )
			
 
				 
			
 
				 # ###
			
@@ -295,137 +205,69 @@ data_inventory_pm2 = pm2.pm2io.from_interchange_format(data_inventory_IF)
 
				 data_main_sector_ts_pm2 = pm2.pm2io.from_interchange_format(data_main_sector_ts_IF)
			
 
				 data_indirect_pm2 = pm2.pm2io.from_interchange_format(data_indirect_IF)
			
 
				 
			
 
				-data_all = data_inventory_pm2.pr.merge(data_main_sector_ts_pm2)
			
 
				-data_all = data_all.pr.merge(data_indirect_pm2)
			
 
				-
			
 
				-# combine CO2 emissions and absorptions
			
 
				-data_CO2 = data_all[['CO2 emissions', 'CO2 removals']].\
			
 
				-    to_array().pr.sum("variable", skipna=True, min_count=1)
			
 
				-data_all["CO2"] = data_CO2
			
 
				-
			
 
				-data_all_if = data_all.pr.to_interchange_format()
			
 
				-
			
 
				+data_all_pm2 = data_inventory_pm2.pr.merge(data_main_sector_ts_pm2)
			
 
				+data_all_pm2 = data_all_pm2.pr.merge(data_indirect_pm2)
			
 
				 
			
 
				+data_all_if = data_all_pm2.pr.to_interchange_format()
			
 
				 
			
 
				 # ###
			
 
				-# convert to IPCC2006 categories
			
 
				+# save raw data to IF and native format
			
 
				 # ###
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+    data_all_if)
			
 
				 
			
 
				-cat_mapping = {
			
 
				-    '3': 'M.AG',
			
 
				-    '3.A': '3.A.1',
			
 
				-    '3.B': '3.A.2',
			
 
				-    '3.C': 'M.3.C.1.AG',  # field burning of agricultural residues
			
 
				-    '3.D': '3.C.2',  # Liming
			
 
				-    '3.E': '3.C.3',  # urea application
			
 
				-    '3.F': '3.C.4',  # direct N2O from agri soils
			
 
				-    '3.G': '3.C.5',  # indirect N2O from agri soils
			
 
				-    '3.H': '3.C.6',  # indirect N2O from manure management
			
 
				-    '3.I': '3.C.7',  # rice
			
 
				-    '4': 'M.LULUCF',
			
 
				-    '4.A': '3.B.1.a',  # forest remaining forest
			
 
				-    '4.B': '3.B.2.a',  # cropland remaining cropland
			
 
				-    '4.C': '3.B.2.b',  # land converted to cropland
			
 
				-    '4.D': '3.B.6.b',  # land converted to other land
			
 
				-    '4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
			
 
				-    '5': '4',
			
 
				-    '5.A': '4.A',
			
 
				-    '5.B': '4.B',
			
 
				-    '5.C': '4.C',
			
 
				-    '5.D': '4.D',
			
 
				-}
			
 
				-
			
 
				-aggregate_cats = {
			
 
				-    '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
			
 
				-              'name': 'Other Process uses of Carbonates'},
			
 
				-    '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
			
 
				-    '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
			
 
				-              'name': 'Emissions from Biomass Burning'},
			
 
				-    '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-            'name': 'Aggregate sources and non-CO2 emissions sources on land'},
			
 
				-    'M.3.C.AG': {
			
 
				-        'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
			
 
				-        'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
			
 
				-    'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
			
 
				-                 'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
			
 
				-    '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
			
 
				-    '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
			
 
				-    '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
			
 
				-    '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
			
 
				-    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
			
 
				-    'M.AG.ELV': {'sources': ['M.3.C.AG'],
			
 
				-                 'name': 'Agriculture excluding livestock emissions'},
			
 
				-}
			
 
				-
			
 
				-data_if_2006 = copy.deepcopy(data_all_if)
			
 
				-data_if_2006.attrs = copy.deepcopy(data_all_if.attrs)
			
 
				-
			
 
				-# map categories
			
 
				-data_if_2006 = data_if_2006.replace({'category (IPCC1996_2006_THA_Inv)': cat_mapping})
			
 
				-data_if_2006["category (IPCC1996_2006_THA_Inv)"].unique()
			
 
				-
			
 
				-# rename the category col
			
 
				-data_if_2006.rename(
			
 
				-    columns={'category (IPCC1996_2006_THA_Inv)': 'category (IPCC2006_PRIMAP)'},
			
 
				-    inplace=True)
			
 
				-data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
			
 
				-data_if_2006.attrs['dimensions']['*'] = [
			
 
				-    'category (IPCC2006_PRIMAP)' if item == 'category (IPCC1996_2006_THA_Inv)'
			
 
				-    else item for item in data_if_2006.attrs['dimensions']['*']]
			
 
				-# aggregate categories
			
 
				-for cat_to_agg in aggregate_cats:
			
 
				-    mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
			
 
				-        aggregate_cats[cat_to_agg]["sources"])
			
 
				-    df_test = data_if_2006[mask]
			
 
				-    # print(df_test)
			
 
				-
			
 
				-    if len(df_test) > 0:
			
 
				-        print(f"Aggregating category {cat_to_agg}")
			
 
				-        df_combine = df_test.copy(deep=True)
			
 
				-
			
 
				-        time_format = '%Y'
			
 
				-        time_columns = [
			
 
				-            col
			
 
				-            for col in df_combine.columns.values
			
 
				-            if matches_time_format(col, time_format)
			
 
				-        ]
			
 
				-
			
 
				-        for col in time_columns:
			
 
				-            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				-
			
 
				-        df_combine = df_combine.groupby(
			
 
				-            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
			
 
				-                'unit']).sum(min_count=1)
			
 
				-
			
 
				-        df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				-        # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				-        # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				-
			
 
				-        df_combine = df_combine.reset_index()
			
 
				-
			
 
				-        data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
			
 
				-        data_if_2006 = data_if_2006.reset_index(drop=True)
			
 
				-    else:
			
 
				-        print(f"no data to aggregate category {cat_to_agg}")
			
 
				-
			
 
				-# conversion to PRIMAP2 native format
			
 
				-data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				-
			
 
				-# convert back to IF to have units in the fixed format
			
 
				-data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				+encoding = {var: compression for var in data_all_pm2.data_vars}
			
 
				+data_all_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+    encoding=encoding)
			
 
				 
			
 
				+# ###
			
 
				+# ## process the data
			
 
				+# ###
			
 
				+data_proc_pm2 = data_all_pm2
			
 
				+
			
 
				+# combine CO2 emissions and removals
			
 
				+data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
			
 
				+    (dim="entity", skipna=True, min_count=1)
			
 
				+data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
			
 
				+
			
 
				+# actual processing
			
 
				+data_proc_pm2 = process_data_for_country(
			
 
				+    data_proc_pm2,
			
 
				+    entities_to_ignore=['CO2 emissions', 'CO2 removals'],
			
 
				+    gas_baskets={},
			
 
				+    processing_info_country=country_processing_step1,
			
 
				+)
			
 
				+
			
 
				+data_proc_pm2 = process_data_for_country(
			
 
				+    data_proc_pm2,
			
 
				+    entities_to_ignore=[],
			
 
				+    gas_baskets=gas_baskets,
			
 
				+    processing_info_country=country_processing_step2,
			
 
				+    cat_terminology_out = terminology_proc,
			
 
				+    category_conversion = cat_conversion,
			
 
				+    sectors_out = sectors_to_save,
			
 
				+)
			
 
				+
			
 
				+# adapt source and metadata
			
 
				+# TODO: processing info is present twice
			
 
				+current_source = data_proc_pm2.coords["source"].values[0]
			
 
				+data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				+data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
			
 
				 
			
 
				 # ###
			
 
				 # save data to IF and native format
			
 
				 # ###
			
 
				-# data in original categories
			
 
				-pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_all_if)
			
 
				-
			
 
				-encoding = {var: compression for var in data_all.data_vars}
			
 
				-data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies["category"] + ".nc"), encoding=encoding)
			
 
				-
			
 
				-# data in 2006 categories
			
 
				-pm2.pm2io.write_interchange_format(output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
			
 
				+data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+pm2.pm2io.write_interchange_format(
			
 
				+    output_folder / (output_filename + terminology_proc), data_proc_if)
			
 
				 
			
 
				-encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-data_pm2_2006.pr.to_netcdf(output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
			
 
				+encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+data_proc_pm2.pr.to_netcdf(
			
 
				+    output_folder / (output_filename + terminology_proc + ".nc"),
			
 
				+    encoding=encoding)
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR4_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR4_from_pdf.py
@@ -15,12 +15,9 @@ os.environ["UNFCCC_GHG_ROOT_PATH"] = \
 
				      "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-import copy
			
 
				 
			
 
				-from UNFCCC_GHG_data.helper import process_data_for_country, GWP_factors
			
 
				+from UNFCCC_GHG_data.helper import process_data_for_country
			
 
				 from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from primap2.pm2io._data_reading import matches_time_format
			
 
				-
			
 
				 from config_THA_BUR4 import gwp_to_use, inv_conf
			
 
				 from config_THA_BUR4 import coords_cols, coords_defaults, coords_terminologies, \
			
 
				     coords_value_mapping, filter_remove, filter_keep, meta_data