Prechádzať zdrojové kódy

add code for Thailand BUR4, fixes in code for Israel BUR2

Johannes Gütschow 1 rok pred
rodič
commit
1fe26b04d2

+ 24 - 9
UNFCCC_GHG_data/UNFCCC_reader/Israel/config_ISR_BUR2.py

@@ -385,14 +385,23 @@ cat_conversion = {
         'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
         '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
     },
+    'basket_copy': {
+        'GWPs_to_add': ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
+        'entities': ["HFCS", "PFCS"],
+        'source_GWP': 'SARGWP100',
+    },
 }
 
 sectors_to_save = [
-    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.4.a', '1.A.4.b', '1.A.4.c', '1.A.5',
-    '1.B', '1.B.1', '1.B.2', '2.A', '2.A.1', '2.A.2', '2.A.4', '2.A.5', '2.A.3',
-    '2.B.2', '2.B.8', '2.B.9', '2.B.10', '2.C', '2.F', '2.H',
-    '3', 'M.AG', '3.A', '3.A.1', '3.A.2', '3.C.1', '3.C.7', 'M.3.C.45.AG', '3.C.7', '3.C.8',
-    'M.LULUCF', '4', '4.A', '4.B', '4.C', '4.D',
+    '1', '1.A', '1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.4.a', '1.A.4.b', '1.A.4.c',
+    '1.A.5',
+    '1.B', '1.B.1', '1.B.2',
+    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.5',
+    '2.B', '2.B.2', '2.B.8', '2.B.9', '2.B.10', '2.C', '2.F', '2.H',
+    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
+    '3.C', '3.C.1', 'M.3.C.1.AG', '3.C.7', 'M.3.C.45.AG', '3.C.8', 'M.3.C.AG',
+    'M.LULUCF', 'M.AG.ELV',
+    '4', '4.A', '4.B', '4.C', '4.D',
     '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO', '5']
 
 
@@ -402,10 +411,16 @@ gas_baskets = {
                            'Unspecified mix of HFCs (SARGWP100)',
                            'Unspecified mix of PFCs (SARGWP100)'],
     'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3',
-                           'Unspecified mix of HFCs (SARGWP100)',
-                           'Unspecified mix of PFCs (SARGWP100)'],
-    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3'],
-    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3'],
+                           'Unspecified mix of HFCs (AR4GWP100)',
+                           'Unspecified mix of PFCs (AR4GWP100)'],
+    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR5GWP100)',
+                          'Unspecified mix of PFCs (AR5GWP100)'
+                          ],
+    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR6GWP100)',
+                          'Unspecified mix of PFCs (AR6GWP100)'
+                          ],
     'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
     'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
     'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],

+ 0 - 11
UNFCCC_GHG_data/UNFCCC_reader/Israel/read_ISR_BUR2_from_pdf.py

@@ -248,17 +248,6 @@ data_pm2.pr.to_netcdf(
 #### processing
 data_proc_pm2 = data_pm2
 
-# add FGASES in AR4, AR5, and AR6 GWPs
-GWPs_to_add = ["AR4GWP100", "AR5GWP100", "AR6GWP100"]
-entities = ["HFCS", "PFCS"]
-for entity in entities:
-    data_SAR = data_proc_pm2[f'{entity} ({gwp_to_use})']
-    for GWP in GWPs_to_add:
-        data_GWP = data_SAR * GWP_factors[f"{gwp_to_use}_to_{GWP}"][entity]
-        data_GWP.attrs["entity"] = entity
-        data_GWP.attrs["gwp_context"] = GWP
-        data_proc_pm2[f"{entity} ({GWP})"] = data_GWP
-
 # combine CO2 emissions and removals
 temp_CO2 = data_proc_pm2["CO2"].copy()
 #data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].to_array()

+ 390 - 0
UNFCCC_GHG_data/UNFCCC_reader/Thailand/config_THA_BUR4.py

@@ -0,0 +1,390 @@
+# configuration for Thailand, BUR4
+# ###
+# for reading
+# ###
+
+# general
+gwp_to_use = "AR4GWP100"
+terminology_proc = 'IPCC2006_PRIMAP'
+
+# 2019 inventory
+inv_conf = {
+    'year': 2019,
+    'entity_row': 0,
+    'unit_row': 1,
+    'index_cols': "Greenhouse gas source and sink categories",
+    # special header as category UNFCCC_GHG_data and name in one column
+    'header_long': ["orig_cat_name", "entity", "unit", "time", "data"],
+    # manual category codes (manual mapping to primap1, will be mapped to primap2
+    # # automatically with the other codes)
+    'cat_codes_manual': {
+        'Total national emissions and removals': '0',
+        'Memo Items (not accounted in total Emissions)': 'MEMO',
+        'International Bunkers': 'MBK',
+        'Aviation International Bunkers': 'MBKA',
+        'Marine-International Bunkers': 'MBKM',
+        'CO2 from biomass': 'MBIO',
+    },
+    'cat_code_regexp': r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*',
+}
+
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC1996_2006_THA_Inv",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "THA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "THA",
+    "scenario": "BUR4",
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+    "entity": {
+        'HFCs': f"HFCS ({gwp_to_use})",
+        'PFCs': f"PFCS ({gwp_to_use})",
+        'NMVOCs': 'NMVOC',
+        'Nox': 'NOx',
+    },
+}
+
+filter_remove = {
+    'f_memo': {"category": "MEMO"},
+}
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/624750",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "Thailand. Biennial update report (BUR). BUR4",
+    "comment": "Read fom pdf by Johannes Gütschow",
+    "institution": "UNFCCC",
+}
+
+# main sector time series
+# manual category codes (manual mapping to primap1, will be mapped to primap2
+# automatically with the other codes)
+cat_codes_manual_main_sector_ts = {
+    'Energy': "1",
+    'Industrial Processes and Product Use': "2",
+    'Agriculture': "3",
+    'LULUCF': "4",
+    'Waste': "5",
+    'Net emissions (Include LULUCF)': "0",
+    'Total emissions (Exclude LULUCF)': "M0EL",
+}
+
+coords_cols_main_sector_ts = {
+    "category": "category",
+}
+
+coords_defaults_main_sector_ts = {
+    "source": "THA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "THA",
+    "scenario": "BUR4",
+    "entity": f"KYOTOGHG ({gwp_to_use})",
+    "unit": "GgCO2eq",
+}
+
+# indirect gases time series
+coords_cols_indirect = {
+    "entity": "entity",
+}
+
+coords_defaults_indirect = {
+    "source": "THA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "THA",
+    "scenario": "BUR4",
+    "category": "0",
+    "unit": "Gg",
+}
+
+# ###
+# for processing
+# ###
+# aggregate categories
+country_processing_step1 = {
+    'aggregate_cats': {
+        '2.A.4': {'sources': ['2.A.4.b', '2.A.4.d'],
+                  'name': 'Other Process uses of Carbonates'},
+        '2.B.8': {'sources': ['2.B.8.b', '2.B.8.c', '2.B.8.e', '2.B.8.f'],
+                  'name': 'Petrochemical and Carbon Black production'},
+    },
+    'aggregate_gases': {
+        'KYOTOGHG': {
+            'basket': 'KYOTOGHG (AR4GWP100)',
+            'basket_contents': ['CO2', 'CH4', 'N2O', 'SF6',
+                                'HFCS (AR4GWP100)', 'PFCS (AR4GWP100)'],
+            'skipna': True,
+            'min_count': 1,
+            'sel': {f'category ({coords_terminologies["category"]})':
+                [
+                    '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
+                    '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
+                    '1.C',
+                    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
+                    '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
+                    '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
+                    '3.H', '3.I',
+                    '4', '4.A', '4.B', '4.C', '4.D',
+                    '4.E', '4.E.1', '4.E.2', '4.E.3',
+                    '5', '5.A', '5.B', '5.C', '5.D'
+                ]
+            }, # not tested
+        },
+    },
+}
+
+country_processing_step2 = {
+    'downscale': {
+        # main sectors present as KYOTOGHG sum. subsectors need to be downscaled
+        # TODO: downscale CO, NOx, NMVOC, SO2 (national total present)
+        'sectors': {
+            '1': {
+                'basket': '1',
+                'basket_contents': ['1.A', '1.B', '1.C'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '1.A': {
+                'basket': '1.A',
+                'basket_contents': ['1.A.1', '1.A.2', '1.A.3', '1.A.4', '1.A.5'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '1.B': {
+                'basket': '1.B',
+                'basket_contents': ['1.B.1', '1.B.2', '1.B.3'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '2': {
+                'basket': '2',
+                'basket_contents': ['2.A', '2.B', '2.C', '2.D', '2.F', '2.G', '2.H'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '2.A': {
+                'basket': '2.A',
+                'basket_contents': ['2.A.1', '2.A.2', '2.A.3', '2.A.4'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '3': {
+                'basket': '3',
+                'basket_contents': ['3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
+                                    '3.H', '3.I'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '4': {
+                'basket': '4',
+                'basket_contents': ['4.A', '4.B', '4.C', '4.D', '4.E'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '4.E': {
+                'basket': '4.E',
+                'basket_contents': ['4.E.1', '4.E.2', '4.E.3'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+            '5': {
+                'basket': '5',
+                'basket_contents': ['5.A', '5.B', '5.C', '5.D'],
+                'entities': ['KYOTOGHG (AR4GWP100)'],
+                'dim': f'category ({coords_terminologies["category"]})',
+            },
+        },
+        'entities': {
+            'KYOTO': {
+                'basket': 'KYOTOGHG (AR4GWP100)',
+                'basket_contents': ['CH4', 'CO2', 'N2O', 'HFCS (AR4GWP100)',
+                                    'PFCS (AR4GWP100)', 'SF6'],
+                'sel': {f'category ({coords_terminologies["category"]})':
+                    [
+                        '0', '1', '1.A', '1.A.1', '1.A.2', '1.A.3',
+                        '1.A.4', '1.A.5', '1.B', '1.B.1', '1.B.2', '1.B.3',
+                        '1.C',
+                        '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4',
+                        '2.B', '2.C', '2.D', '2.F', '2.G', '2.H',
+                        '3', '3.A', '3.B', '3.C', '3.D', '3.E', '3.F', '3.G',
+                        '3.H', '3.I',
+                        '4', '4.A', '4.B', '4.C', '4.D',
+                        '4.E', '4.E.1', '4.E.2', '4.E.3',
+                        '5', '5.A', '5.B', '5.C', '5.D']},
+            },
+        },
+    },
+    'basket_copy': {
+        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        'entities': ["HFCS", "PFCS"],
+        'source_GWP': 'AR4GWP100',
+    },
+}
+
+cat_conversion = {
+    'mapping': {
+        '0': '0',
+        'M.0.EL': 'M.0.EL',
+        '1': '1',
+        '1.A': '1.A',
+        '1.A.1': '1.A.1',
+        '1.A.1.a': '1.A.1.a',
+        '1.A.1.b': '1.A.1.b',
+        '1.A.2': '1.A.2',
+        '1.A.3': '1.A.3',
+        '1.A.3.a': '1.A.3.a',
+        '1.A.3.b': '1.A.3.b',
+        '1.A.3.c': '1.A.3.c',
+        '1.A.3.d': '1.A.3.d',
+        '1.A.4': '1.A.4',
+        '1.A.5': '1.A.5',
+        '1.B': '1.B',
+        '1.B.1': '1.B.1',
+        '1.B.2': '1.B.2',
+        '1.B.3': '1.B.3',
+        '1.C': '1.C',
+        '1.C.1': '1.C.1',
+        '1.C.2': '1.C.2',
+        '1.C.3': '1.C.3',
+        '2': '2',
+        '2.A': '2.A',
+        '2.A.1': '2.A.1',
+        '2.A.2': '2.A.2',
+        '2.A.3': '2.A.3',
+        '2.A.4': '2.A.4',
+        '2.A.4.b': '2.A.4.b',
+        '2.A.4.d': '2.A.4.d',
+        '2.B': '2.B',
+        '2.B.2': '2.B.2',
+        '2.B.4': '2.B.4',
+        '2.B.8': '2.B.8',
+        '2.B.8.b': '2.B.8.b',
+        '2.B.8.c': '2.B.8.c',
+        '2.B.8.e': '2.B.8.e',
+        '2.B.8.f': '2.B.8.f',
+        '2.C': '2.C',
+        '2.C.1': '2.C.1',
+        '2.D': '2.D',
+        '2.D.1': '2.D.1',
+        '2.F': '2.F',
+        '2.F.1': '2.F.1',
+        '2.G': '2.G',
+        '2.G.1': '2.G.1',
+        '2.H': '2.H',
+        '2.H.1': '2.H.1',
+        '2.H.2': '2.H.2',
+        '3': 'M.AG',
+        '3.A': '3.A.1',
+        '3.B': '3.A.2',
+        '3.C': 'M.3.C.1.b.i',  # field burning of agricultural residues
+        '3.D': '3.C.2',  # Liming
+        '3.E': '3.C.3',  # urea application
+        '3.F': '3.C.4',  # direct N2O from agri soils
+        '3.G': '3.C.5',  # indirect N2O from agri soils
+        '3.H': '3.C.6',  # indirect N2O from manure management
+        '3.I': '3.C.7',  # rice
+        #'4': 'M.LULUCF',
+        '4.A': '3.B.1.a',  # forest remaining forest
+        '4.B': '3.B.2.a',  # cropland remaining cropland
+        '4.C': '3.B.2.b',  # land converted to cropland
+        '4.D': '3.B.6.b',  # land converted to other land
+        #'4.E': 'M.3.C.1.LU',  # biomass burning (LULUCF)
+        '4.E.1': '3.C.1.a', # biomass burning (Forest Land)
+        '4.E.2': 'M.3.C.1.b.ii', # biomass burning (Cropland)
+        '4.E.3': '3.C.1.d', # biomass burning (Other Land)
+        '5': '4',
+        '5.A': '4.A',
+        '5.A.1': '4.A.1',
+        '5.A.2': '4.A.2',
+        '5.B': '4.B',
+        '5.C': '4.C',
+        '5.C.1': '4.C.1',
+        '5.D': '4.D',
+        '5.D.1': '4.D.1',
+        '5.D.2': '4.D.2',
+        'M.BK': 'M.BK',
+        'M.BK.A': 'M.BK.A',
+        'M.BK.M': 'M.BM.M',
+        'M.BIO': 'M.BIO',
+    },
+    'aggregate': {
+        '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
+        '3.C.1.b': {'sources': ['M.3.C.1.b.i', 'M.3.C.1.b.ii'],
+                  'name': 'Biomass Burning In Cropland'},
+        'M.3.C.1.AG': {'sources': ['3.C.1.b', '3.C.1.c'],
+                  'name': 'Biomass Burning (Agriculture)'},
+        'M.3.C.1.LU': {'sources': ['3.C.1.a', '3.C.1.d'],
+                  'name': 'Biomass Burning (LULUCF)'},
+        '3.C.1': {'sources': ['M.3.C.1.AG', 'M.3.C.1.LU'],
+                  'name': 'Emissions from Biomass Burning'},
+        '3.C': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
+                'name': 'Aggregate sources and non-CO2 emissions sources on land'},
+        'M.3.C.AG': {
+            'sources': ['M.3.C.1.AG', '3.C.2', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
+            'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
+        'M.AG.ELV': {'sources': ['M.3.C.AG'],
+                     'name': 'Agriculture excluding livestock emissions'},
+        'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
+                     'name': 'Aggregate sources and non-CO2 emissions sources on land (Land use)'},
+        '3.B.1': {'sources': ['3.B.1.a'], 'name': 'Forest Land'},
+        '3.B.2': {'sources': ['3.B.2.a', '3.B.2.b'], 'name': 'Cropland'},
+        '3.B.6': {'sources': ['3.B.6.b'], 'name': 'Other Land'},
+        '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.6'], 'name': 'Land'},
+        'M.LULUCF': {'sources': ['3.B', 'N.3.C.LU'], 'name': 'LULUCF'},
+        '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
+    },
+}
+
+sectors_to_save = [
+    '1', '1.A', '1.A.1', '1.A.1.a', '1.A.1.b', '1.A.2', '1.A.3', '1.A.3.a', '1.A.3.b',
+    '1.A.3.c', '1.A.3.d', '1.A.4', '1.A.5',
+    '1.B', '1.B.1', '1.B.2', '1.B.3', '1.C', '1.C.1', '1.C.2', '1.C.3',
+    '2', '2.A', '2.A.1', '2.A.2', '2.A.3', '2.A.4', '2.A.4.b', '2.A.4.d',
+    '2.B', '2.B.2', '2.B.4', '2.B.8', '2.B.8.a', '2.B.8.c', '2.B.8.e', '2.B.8.f',
+    '2.C', '2.C.1', '2.F', '2.F.1', '2.G', '2.G.1', '2.H', '2.H.1', '2.H.2',
+    '3', 'M.AG', '3.A', '3.A.1', '3.A.2',
+    '3.C', '3.C.1', '3.C.1.a', '3.C.1.b', '3.C.1.d', '3.C.2', '3.C.3', '3.C.4',
+    '3.C.5', '3.C.6', '3.C.7', 'M.3.C.1.AG', 'M.3.C.AG', 'M.AG.ELV',
+    'M.LULUCF', 'M.3.C.1.LU', 'M.3.C.LU', '3.B', '3.B.1', '3.B.1.a', '3.B.2', '3.B.2.a',
+    '3.B.2.b', '3.B.6', '3.B.6.b',
+    '4', '4.A', '4.A.1', '4.A.2', '4.B', '4.C', '4.C.1', '4.D', '4.D.1', '4.D.2',
+    '0', 'M.0.EL', 'M.BK', 'M.BK.A', 'M.BK.M', 'M.BIO']
+
+
+# gas baskets
+gas_baskets = {
+    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (SARGWP100)',
+                           'Unspecified mix of PFCs (SARGWP100)'],
+    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (AR4GWP100)',
+                           'Unspecified mix of PFCs (AR4GWP100)'],
+    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR5GWP100)',
+                          'Unspecified mix of PFCs (AR5GWP100)'
+                          ],
+    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR6GWP100)',
+                          'Unspecified mix of PFCs (AR6GWP100)'
+                          ],
+    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
+    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
+    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
+    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
+}

+ 1 - 1
UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py

@@ -47,7 +47,7 @@ cat_codes_manual = {
     'CO2 from Biomass': 'MBIO',
 }
 
-cat_code_regexp = r'^(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,4})[\s\.].*'
+cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
 
 coords_cols = {
     "category": "category",

+ 231 - 0
UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR4_from_pdf.py

@@ -0,0 +1,231 @@
+# this script reads data from Thailand's BUR3
+# Data is read from two csv files which have been created manually from ocr processed
+# pdf files
+# pdftk Thailand_BUR4_final_28122022.pdf cat 65-67east output inventory_2019.pdf
+# ocrmypdf --force-ocr inventory_2019.pdf inventory_2019_ocr.pdf
+# pdftk Thailand_BUR4_final_28122022.pdf cat 69 output trends.pdf
+# ocrmypdf --force-ocr trends.pdf trends_ocr.pdf
+
+# values for HFCs and SF6 have been taken from Table2-9 where they are present in
+# CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
+# tables
+
+import os
+os.environ["UNFCCC_GHG_ROOT_PATH"] = \
+     "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
+import pandas as pd
+import primap2 as pm2
+import copy
+
+from UNFCCC_GHG_data.helper import process_data_for_country, GWP_factors
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from primap2.pm2io._data_reading import matches_time_format
+
+from config_THA_BUR4 import gwp_to_use, inv_conf
+from config_THA_BUR4 import coords_cols, coords_defaults, coords_terminologies, \
+    coords_value_mapping, filter_remove, filter_keep, meta_data
+from config_THA_BUR4 import coords_cols_main_sector_ts, \
+    cat_codes_manual_main_sector_ts, coords_defaults_main_sector_ts
+from config_THA_BUR4 import coords_defaults_indirect, coords_cols_indirect
+from config_THA_BUR4 import gas_baskets, cat_conversion, terminology_proc, \
+    sectors_to_save
+from config_THA_BUR4 import country_processing_step1, country_processing_step2
+
+# ###
+# configuration
+# ###
+input_folder = downloaded_data_path / 'UNFCCC' / 'Thailand' / 'BUR4'
+output_folder = extracted_data_path / 'UNFCCC' / 'Thailand'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+inventory_file = 'THA_inventory_2019.csv'
+trends_file = 'THA_trends_2000-2019.csv'
+indirect_file = 'THA_indirect_2000-2019.csv'
+output_filename = 'THA_BUR4_2022_'
+
+compression = dict(zlib=True, complevel=9)
+
+
+# ###
+# read the inventory data and convert to PM2 IF
+# ###
+df_inventory = pd.read_csv(input_folder /inventory_file, header=None)
+df_inventory = pm2.pm2io.nir_add_unit_information(
+    df_inventory, unit_row=inv_conf["unit_row"], entity_row=inv_conf["entity_row"],
+    regexp_entity=".*", regexp_unit=".*", default_unit="Gg")
+# set index and convert to long format
+df_inventory = df_inventory.set_index(inv_conf["index_cols"])
+df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf["year"],
+                                                     inv_conf["header_long"])
+df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
+
+# prep for conversion to PM2 IF and native format
+# make a copy of the categories row
+df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
+
+# replace cat names by codes in col "category"
+# first the manual replacements
+df_inventory_long["category"] = \
+    df_inventory_long["category"].replace(inv_conf["cat_codes_manual"])
+# then the regex replacements
+repl = lambda m: m.group('code')
+df_inventory_long["category"] = \
+    df_inventory_long["category"].str.replace(inv_conf["cat_code_regexp"], repl,
+                                              regex=True)
+df_inventory_long = df_inventory_long.reset_index(drop=True)
+
+# make sure all col headers are str
+df_inventory_long.columns = df_inventory_long.columns.map(str)
+
+df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
+
+data_inventory_IF = pm2.pm2io.convert_long_dataframe_if(
+    df_inventory_long,
+    coords_cols=coords_cols,
+    #add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+    )
+
+# ###
+# read the main sector time series and convert to PM2 IF
+# ###
+df_main_sector_ts = pd.read_csv(input_folder / trends_file)
+
+df_main_sector_ts = df_main_sector_ts.transpose()
+df_main_sector_ts = df_main_sector_ts.reset_index(drop=False)
+cols = df_main_sector_ts.iloc[0].copy(deep=True)
+cols.iloc[0] = "category"
+cols.iloc[1:] = cols.iloc[1:].astype(int).astype(str)
+df_main_sector_ts.columns = cols
+df_main_sector_ts = df_main_sector_ts.drop(0)
+
+# replace cat names by codes in col "category"
+df_main_sector_ts["category"] = \
+    df_main_sector_ts["category"].replace(cat_codes_manual_main_sector_ts)
+
+data_main_sector_ts_IF = pm2.pm2io.convert_wide_dataframe_if(
+    df_main_sector_ts,
+    coords_cols=coords_cols_main_sector_ts,
+    #add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults_main_sector_ts,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format='%Y',
+    )
+
+
+# ###
+# read the indirect gases time series and convert to PM2 IF
+# ###
+df_indirect = pd.read_csv(input_folder / indirect_file)
+
+df_indirect = df_indirect.transpose()
+df_indirect = df_indirect.reset_index(drop=False)
+cols = df_indirect.iloc[0].copy(deep=True)
+cols.iloc[0] = "entity"
+cols.iloc[1:] = cols.iloc[1:].astype(int).astype(str)
+df_indirect.columns = cols
+df_indirect = df_indirect.drop(0)
+
+data_indirect_IF = pm2.pm2io.convert_wide_dataframe_if(
+    df_indirect,
+    coords_cols=coords_cols_indirect,
+    #add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults_indirect,
+    coords_terminologies=coords_terminologies,
+    coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    #filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+    )
+
+# ###
+# merge the three datasets
+# ###
+data_inventory_pm2 = pm2.pm2io.from_interchange_format(data_inventory_IF)
+data_main_sector_ts_pm2 = pm2.pm2io.from_interchange_format(data_main_sector_ts_IF)
+data_indirect_pm2 = pm2.pm2io.from_interchange_format(data_indirect_IF)
+
+data_all_pm2 = data_inventory_pm2.pr.merge(data_main_sector_ts_pm2)
+data_all_pm2 = data_all_pm2.pr.merge(data_indirect_pm2)
+
+data_all_if = data_all_pm2.pr.to_interchange_format()
+
+# ###
+# save raw data to IF and native format
+# ###
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+    data_all_if)
+
+encoding = {var: compression for var in data_all_pm2.data_vars}
+data_all_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+    encoding=encoding)
+
+# ###
+# ## process the data
+# ###
+data_proc_pm2 = data_all_pm2
+
+# combine CO2 emissions and removals
+data_proc_pm2["CO2"] = data_proc_pm2[["CO2 emissions", "CO2 removals"]].pr.sum\
+    (dim="entity", skipna=True, min_count=1)
+data_proc_pm2["CO2"].attrs['entity'] = 'CO2'
+
+# actual processing
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
+    entities_to_ignore=['CO2 emissions', 'CO2 removals'],
+    gas_baskets={},
+    processing_info_country=country_processing_step1,
+)
+
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
+    entities_to_ignore=[],
+    gas_baskets=gas_baskets,
+    processing_info_country=country_processing_step2,
+    cat_terminology_out = terminology_proc,
+    category_conversion = cat_conversion,
+    sectors_out = sectors_to_save,
+)
+
+# adapt source and metadata
+# TODO: processing info is present twice
+current_source = data_proc_pm2.coords["source"].values[0]
+data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
+
+# ###
+# save data to IF and native format
+# ###
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + terminology_proc), data_proc_if)
+
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + terminology_proc + ".nc"),
+    encoding=encoding)

+ 12 - 0
UNFCCC_GHG_data/helper/definitions.py

@@ -61,4 +61,16 @@ GWP_factors = {
         'HFCS': 1.4,
         'PFCS': 1.3,
     },
+    'AR4GWP100_to_SARGWP100': {
+        'HFCS': 0.91,
+        'PFCS': 0.91,
+    },
+    'AR4GWP100_to_AR5GWP100': {
+        'HFCS': 1.1,
+        'PFCS': 1.1,
+    },
+    'AR4GWP100_to_AR6GWP100': {
+        'HFCS': 1.27,
+        'PFCS': 1.18,
+    },
 }

+ 15 - 1
UNFCCC_GHG_data/helper/functions.py

@@ -11,7 +11,7 @@ from pathlib import Path
 from .definitions import custom_country_mapping, custom_folders
 from .definitions import root_path, downloaded_data_path, extracted_data_path
 from .definitions import legacy_data_path, code_path
-
+from .definitions import GWP_factors
 
 def process_data_for_country(
         data_country: xr.Dataset,
@@ -228,6 +228,20 @@ def process_data_for_country(
                 else:
                     print(f"no data to aggregate category {cat_to_agg}")
 
+        # copy HFCs and PFCs with default factors
+        if 'basket_copy' in processing_info_country:
+            GWPs_to_add = processing_info_country["basket_copy"]["GWPs_to_add"]
+            entities = processing_info_country["basket_copy"]["entities"]
+            source_GWP = processing_info_country["basket_copy"]["source_GWP"]
+            for entity in entities:
+                data_source = data_country[f'{entity} ({source_GWP})']
+                for GWP in GWPs_to_add:
+                    data_GWP = data_source * \
+                               GWP_factors[f"{source_GWP}_to_{GWP}"][entity]
+                    data_GWP.attrs["entity"] = entity
+                    data_GWP.attrs["gwp_context"] = GWP
+                    data_country[f"{entity} ({GWP})"] = data_GWP
+
         # aggregate gases if desired
         if 'aggregate_gases' in processing_info_country:
             # TODO: why use different code here than below. Can this fill non-existen

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/THA_indirect_2000-2019.csv

@@ -0,0 +1 @@
+../../../../.git/annex/objects/4V/gz/MD5E-s718--a71e1c2f5e60158552b03cdf207d9bf3.csv/MD5E-s718--a71e1c2f5e60158552b03cdf207d9bf3.csv

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/THA_inventory_2019.csv

@@ -0,0 +1 @@
+../../../../.git/annex/objects/FF/44/MD5E-s5482--63491cef34ffca8e26a86b3eaf3469dc.csv/MD5E-s5482--63491cef34ffca8e26a86b3eaf3469dc.csv

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/THA_trends_2000-2019.csv

@@ -0,0 +1 @@
+../../../../.git/annex/objects/9G/KF/MD5E-s1559--84ebb9c01164164595f6242cbd9527f4.csv/MD5E-s1559--84ebb9c01164164595f6242cbd9527f4.csv

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/indirect.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/20/ff/MD5E-s417057--7b3eb95f9e2d6967b3010f40f6fe5bba.pdf/MD5E-s417057--7b3eb95f9e2d6967b3010f40f6fe5bba.pdf

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/indirect_ocr.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/j9/VW/MD5E-s206616--5caadfe7d49c995a685fc02e99039429.pdf/MD5E-s206616--5caadfe7d49c995a685fc02e99039429.pdf

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/inventory_2019.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/6w/Mx/MD5E-s1970438--660e0f9d59ccd629dac4368e9101a6e5.pdf/MD5E-s1970438--660e0f9d59ccd629dac4368e9101a6e5.pdf

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/inventory_2019_ocr.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/gj/75/MD5E-s880391--ac488b16406850cac2659ca0df31c5aa.pdf/MD5E-s880391--ac488b16406850cac2659ca0df31c5aa.pdf

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/trends.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/5X/GP/MD5E-s812736--aa9ead9eb6f6a900854fda733a559c28.pdf/MD5E-s812736--aa9ead9eb6f6a900854fda733a559c28.pdf

+ 1 - 0
downloaded_data/UNFCCC/Thailand/BUR4/processed_pdf/trends_ocr.pdf

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/xg/1q/MD5E-s256479--3ea01da08f5f625b66ec849027b6fa7c.pdf/MD5E-s256479--3ea01da08f5f625b66ec849027b6fa7c.pdf

+ 1 - 0
setup.cfg

@@ -69,6 +69,7 @@ dev =
     ipykernel
     jupyter
     dask
+    ipympl
 
 
 [options.package_data]