浏览代码

Add code for Malaysia BURs 3 and 4 and remove hard coded folder in other reading scripts

Johannes Gütschow 1 年之前
父节点
当前提交
e834b60373

+ 0 - 3
UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py

@@ -1,9 +1,6 @@
 # this script reads data from Chile's 2020 national inventory which is underlying BUR4
 # Data is read from the xlsx file
 
-import os
-os.environ["UNFCCC_GHG_ROOT_PATH"] = \
-    "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
 import sys
 import camelot
 import primap2 as pm2

+ 0 - 3
UNFCCC_GHG_data/UNFCCC_reader/Israel/read_ISR_BUR2_from_pdf.py

@@ -2,9 +2,6 @@
 
 # TODO: bunkers trend tables not read because of special format
 
-import os
-os.environ["UNFCCC_GHG_ROOT_PATH"] = \
-     "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
 from UNFCCC_GHG_data.helper import process_data_for_country, GWP_factors
 from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
 import camelot

+ 686 - 0
UNFCCC_GHG_data/UNFCCC_reader/Malaysia/config_MYS_BUR3.py

@@ -0,0 +1,686 @@
+import pandas as pd
+gwp_to_use = "AR4GWP100"
+
+
+cat_names_fix = {
+    '2A3 Glass Prod.': '2A3 Glass Production',
+    '2F6 Other Applications': '2F6 Other Applications (please specify)',
+    '3A2 Manure Mngmt': '3A2 Manure Mngmt.',
+    '3C7 Rice Cultivations': '3C7 Rice Cultivation',
+}
+
+values_replacement = {
+    '': '-',
+    ' ': '-',
+}
+
+cols_for_space_stripping = ["Categories"]
+
+index_cols = ["Categories", "entity", "unit"]
+
+# parameters part 2: conversion to interchange format
+cats_remove = ['Memo items', 'Information items']
+
+cat_codes_manual = {
+    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
+    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
+    'CO2 captured': 'M.CCS',
+    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
+    'For domestic storage': 'M.CCS.DOM',
+    'For storage in other countries': 'M.CCS.OCT',
+    'International Aviation (International Bunkers)': 'M.BK.A',
+    'International Bunkers': 'M.BK',
+    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
+    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
+    'Multilateral Operations': 'M.MULTIOP',
+    'Other (please specify)': 'M.OTHER',
+    'Total National Emissions and Removals': '0',
+}
+
+cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "MYS-GHG-inventory",
+    "provenance": "measured",
+    "area": "MYS",
+    "scenario": "BUR3"
+}
+
+coords_value_mapping = {
+}
+
+coords_cols = {
+    "category": "Categories",
+    "entity": "entity",
+    "unit": "unit"
+}
+
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/267685",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "Malaysia - Third Biennial Update Report to the UNFCCC",
+    "comment": "Read fom pdf file by Johannes Gütschow",
+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+}
+
+terminology_proc = coords_terminologies["category"]
+
+table_def_templates = {
+    '184': { #184
+        "area": ['54,498,793,100'],
+        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
+                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
+               ],
+        },
+    },
+    '185': { #184
+        "area": ['34,504,813,99'],
+        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
+                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
+                '2A1 Cement', '2A2 Lime',
+               ],
+        },
+    },
+    '186': { #also 200
+        "area": ['53,498,786,104'],
+        "cols": ['150,197,238,296,347,396,444,489,540,587,634,686,739'],
+        "rows_to_fix": {
+            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
+                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
+                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
+                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
+                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
+               ],
+            2: ['2B9 Fluorochemical'],
+        },
+    },
+    '187': { # also 201
+        "area": ['39,499,807,91'],
+        "cols": ['132,185,232,280,327,375,425,470,522,568,613,664,713,763'],
+        "rows_to_fix": {
+            3: ['2A3 Glass', '2A4 Other Process', '2A5 Other (please',
+                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
+                '2B3 Adipic Acid', '2B5 Carbide',
+                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
+                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys',
+               ],
+            2: ['2B9 Fluorochemical'],
+            5: ['2B4 Caprolactam,'],
+        },
+    },
+    '188': {
+        "area": ['48,503,802,92'],
+        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
+        "rows_to_fix": {
+            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
+                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
+                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
+                '2F1 Refrigeration',
+               ],
+            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '189': {
+        "area": ['41,499,806,95'],
+        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
+        "rows_to_fix": {
+            3: ['2C3 Aluminium', '2C4 Magnesium', '2C7 Other (please',
+                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
+                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
+                '2F1 Refrigeration',
+               ],
+            2: ['2E2 TFT Flat Panel', '2E4 Heat Transfer'],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '190': {
+        "area": ['45,500,802,125'],
+        "cols": ['146,193,243,295,349,400,453,501,549,595,644,696,748'],
+        "rows_to_fix": {
+            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
+                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
+                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
+               ],
+            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
+        },
+    },
+    '191': {
+        "area": ['38,498,814,120'],
+        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
+        "rows_to_fix": {
+            3: ['2F2 Foam Blowing', '2F6 Other', '2G Other Product',
+                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
+                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
+               ],
+            2: ['2G1 Electrical', '2G3 N2O from', '3A1 Enteric'],
+        },
+    },
+    '192': {
+        "area": ['39,502,807,106'],
+        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
+        "rows_to_fix": {
+            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
+                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested Wood',
+                '3D2 Other (please',
+               ],
+            5: ['3C Aggregate',],
+        },
+    },
+    '193': {
+        "area": ['36,508,815,119'],
+        "cols": ['128,179,228,278,327,379,428,476,525,571,622,670,717,766'],
+        "rows_to_fix": {
+            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
+                '3C6 Indirect N2O', '3C8 Other (please', '3D1 Harvested',
+                '3D2 Other (please',
+               ],
+            5: ['3C Aggregate',],
+        },
+    },
+    '194': {
+        "area": ['80,502,762,151'],
+        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
+        "rows_to_fix": {
+            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',],
+            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
+                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
+                '4D2 Industrial Wastewater',
+               ],
+            5: ['5A Indirect N2O'],
+        },
+    },
+    '195': {
+        "area": ['78,508,765,103'],
+        "cols": ['191,230,271,314,352,400,438,475,519,566,600,645,686,730'],
+        "rows_to_fix": {
+            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
+                '4B Biological', '4D Wastewater', '4D1 Domestic',
+                '4D2 Industrial', '5B Other (please'
+               ],
+            2: ['4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
+                '4A Solid Waste',
+               ],
+            5: ['5A Indirect N2O'],
+        },
+    },
+    '196': {
+        "area": ['80,502,762,151'],
+        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
+        "rows_to_fix": {
+            3: ['International Aviation', 'International Water-borne',
+                'CO2 from Biomass Burning', 'For storage in other',
+                'Long-term storage of', 'Annual change in total',
+                'Annual change in long-',
+               ],
+        },
+    },
+    '197': {
+        "area": ['74,507,779,201'],
+        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
+        "rows_to_fix": {
+            3: ['International Aviation', 'International Water-',
+                'CO2 from Biomass', 'For storage in other',
+                'Long-term storage of', 'Annual change in total',
+                'Annual change in long-',
+               ],
+        },
+    },
+    '198': { # first CH4 table
+        "area": ['54,498,793,100'],
+        "cols": ['140,197,250,296,346,394,444,493,540,587,637,685,738'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
+                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
+               ],
+            -3: ['2A Mineral Industry'],
+        },
+    },
+    '199': {
+        "area": ['34,506,818,97'],
+        "cols": ['132,177,228,276,329,377,432,479,528,574,618,667,722,774'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
+                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A1 Cement',
+                '2A Mineral', '2A2 Lime',
+               ],
+        },
+    },
+    '202': {
+        "area": ['48,503,802,92'],
+        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
+        "rows_to_fix": {
+            3: ['2C3 Aluminium', '2C7 Other (please',
+                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
+                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
+               ],
+            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
+                '2F1 Refrigeration',
+               ],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '203': {
+        "area": ['41,499,806,95'],
+        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
+        "rows_to_fix": {
+            3: ['2C3 Aluminium', '2C7 Other (please',
+                '2D Non-Energy', '2D2 Paraffin Wax', '2D4 Other (please',
+                '2E Electronics', '2E1 Integrated', '2E5 Other (please',
+               ],
+            2: ['2C4 Magnesium', '2E2 TFT Flat Panel', '2E4 Heat Transfer',
+                '2F1 Refrigeration'
+               ],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '204': {
+        "area": ['45,500,802,125'],
+        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
+        "rows_to_fix": {
+            3: ['2F6 Other', '2G Other Product',
+                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
+                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
+                '3A1 Enteric',
+               ],
+            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
+        },
+    },
+    '205': {
+        "area": ['38,498,814,120'],
+        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
+        "rows_to_fix": {
+            3: ['2F6 Other', '2G Other Product',
+                '2G2 SF6 and PFCs', '2G4 Other (Please', '2H1 Pulp and Paper',
+                '2H2 Food and', '2H3 Other (please', '3 AGRICULTURE,',
+                '3A1 Enteric',
+               ],
+            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G3 N2O from'],
+        },
+    },
+    '206': { #also 220
+        "area": ['39,502,807,106'],
+        "cols": ['134,193,245,296,346,400,455,507,556,602,650,701,755'],
+        "rows_to_fix": {
+            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
+                '3C6 Indirect N2O', '3C8 Other (please',
+                '3D2 Other (please',
+               ],
+            2: ['3D1 Harvested Wood',],
+            5: ['3C Aggregate',],
+        },
+    },
+    '207': { # also 221
+        "area": ['36,508,815,110'],
+        "cols": ['128,179,228,278,327,379,428,476,527,571,622,670,717,766'],
+        "rows_to_fix": {
+            3: ['3C1 Emissions from', '3C4 Direct N2O', '3C5 Indirect N2O',
+                '3C6 Indirect N2O', '3C8 Other (please',
+                '3D2 Other (please',
+               ],
+            2: ['3D1 Harvested',],
+            5: ['3C Aggregate',],
+        },
+    },
+    '208': { # also 222
+        "area": ['80,502,762,151'],
+        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
+        "rows_to_fix": {
+            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
+                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised Waste',
+                '4B Biological Treatment', '4D Wastewater', '4D1 Domestic Wastewater',
+                '4D2 Industrial Wastewater'
+               ],
+            5: ['5A Indirect N2O'],
+        },
+    },
+    '209': { # also 223
+        "area": ['78,508,765,103'],
+        "cols": ['191,230,271,314,352,400,438,475,519,560,600,645,686,730'],
+        "rows_to_fix": {
+            3: ['4C Incineration and', '4C2 Open Burning of', '4E Other',
+                '4B Biological', '4D Wastewater', '4D1 Domestic',
+                '4D2 Industrial', '5B Other (please',
+                '4A1 Managed Waste', '4A2 Unmanaged Waste', '4A3 Uncategorised',
+                '4A Solid Waste'
+               ],
+            5: ['5A Indirect N2O'],
+        },
+    },
+    '210': { # also 224
+        "area": ['80,502,762,151'],
+        "cols": ['201,243,285,329,376,419,462,502,551,591,635,679,724'],
+        "rows_to_fix": {
+            3: ['International Aviation', 'International Water-borne',
+                'Long-term storage of', 'Annual change in total',
+                'Annual change in long-',
+               ],
+            2: ['CO2 from Biomass Burning', 'For storage in other',],
+        },
+    },
+    '211': { # also 225
+        "area": ['74,507,779,201'],
+        "cols": ['182,226,268,311,354,398,444,482,524,565,610,654,693,733'],
+        "rows_to_fix": {
+            3: ['International Aviation', 'International Water-',
+                'Long-term storage of', 'Annual change in total',
+                'Annual change in long-', 'CO2 from Biomass',
+               ],
+            2: ['For storage in other',],
+        },
+    },
+    '212': {
+        "area": ['54,498,793,100'],
+        "cols": ['150,197,250,296,346,394,444,493,540,587,637,685,738'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel Combustion', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other emissions',
+                '1C Carbon Dioxide', '2 INDUSTRIAL',
+               ],
+            2: ['2A1 Cement',],
+        },
+    },
+    '213': {
+        "area": ['34,504,813,99'],
+        "cols": ['128,177,224,273,321,373,425,473,519,564,611,661,713,765'],
+        "rows_to_fix": {
+            3: ['Total National', '1A Fuel', '1A1 Energy', '1A2 Manufacturing',
+                '1B Fugitive', '1B2 Oil and Natural', '1B3 Other',
+                '1C Carbon Dioxide', '2 INDUSTRIAL', '2A Mineral',
+               ],
+            2: ['2A1 Cement', '2A2 Lime',],
+        },
+    },
+    '214': {
+        "area": ['47,499,801,93'],
+        "cols": ['141,197,246,297,350,396,453,502,550,595,642,692,748'],
+        "rows_to_fix": {
+            3: ['2A5 Other (please',
+                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
+                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
+                '2B6 Titanium', '2B7 Soda Ash', '2B8 Petrochemical',
+                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
+               ],
+            2: ['2A3 Glass', '2A4 Other Process', '2B9 Fluorochemical'],
+            -3: ['2C Metal Industry'],
+        },
+    },
+    '215': {
+        "area": ['39,499,807,91'],
+        "cols": ['132,180,232,280,327,375,425,470,522,568,613,664,713,763'],
+        "rows_to_fix": {
+            3: ['2A5 Other (please',
+                '2B Chemical', '2B1 Ammonia', '2B2 Nitric Acid',
+                '2B3 Adipic Acid', '2B4 Caprolactam,', '2B5 Carbide',
+                '2B6 Titanium Dioxide', '2B7 Soda Ash', '2B8 Petrochemical',
+                '2B10 Other (Please', '2C1 Iron and Steel', '2C2 Ferroalloys'
+               ],
+            2: ['2A4 Other Process', '2B9 Fluorochemical'],
+            -3: ['2C Metal Industry'],
+        },
+    },
+    '216': {
+        "area": ['48,503,802,92'],
+        "cols": ['146,194,245,295,346,400,452,500,549,596,642,695,746'],
+        "rows_to_fix": {
+            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
+                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
+                '2E5 Other (please',
+               ],
+            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
+                '2E4 Heat Transfer', '2F1 Refrigeration',
+               ],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '217': {
+        "area": ['41,499,806,95'],
+        "cols": ['141,184,233,282,331,376,427,472,520,567,618,665,717,760'],
+        "rows_to_fix": {
+            3: ['2C7 Other (please', '2D Non-Energy', '2D2 Paraffin Wax',
+                '2D4 Other (please', '2E Electronics', '2E1 Integrated',
+                '2E5 Other (please',
+               ],
+            2: ['2C3 Aluminium', '2C4 Magnesium', '2E2 TFT Flat Panel',
+                '2E4 Heat Transfer', '2F1 Refrigeration',
+               ],
+            5: ['2F Product Uses as'],
+        },
+    },
+    '218': {
+        "area": ['45,500,802,125'],
+        "cols": ['146,193,243,295,349,400,455,501,549,595,644,696,748'],
+        "rows_to_fix": {
+            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
+                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
+               ],
+            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
+                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
+        },
+    },
+    '219': {
+        "area": ['38,498,814,120'],
+        "cols": ['130,180,229,277,326,381,429,477,526,570,620,669,717,765'],
+        "rows_to_fix": {
+            3: ['2F6 Other', '2G Other Product', '2G2 SF6 and PFCs',
+                '2G3 N2O from', '2H3 Other (please', '3 AGRICULTURE,',
+               ],
+            2: ['2F2 Foam Blowing', '2G1 Electrical', '2G4 Other (Please',
+                '2H1 Pulp and Paper', '2H2 Food and', '3A1 Enteric',],
+        },
+    },
+    '226': { # also 334, 238
+        "area": ['48,510,797,99'],
+        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
+        "rows_to_fix": {
+            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
+        }
+    },
+    '227': { # also 331, 335, 339
+        "area": ['27,510,818,99'],
+        "cols": ['250,290,333,372,413,452,494,536,576,616,656,699,739,781'],
+        "rows_to_fix": {
+            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
+        }
+    },
+    '228': {
+        "area": ['48,510,797,99'],
+        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone'],
+            2: ['2D Non-Energy Products from Fuels and Solvent'],
+        },
+    },
+    '229': {
+        "area": ['25,512,819,86'],
+        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone'],
+            2: ['2D Non-Energy Products from Fuels and Solvent'],
+        },
+    },
+    '230': {
+        "area": ['48,510,797,99'],
+        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
+        "rows_to_fix": {
+            -3: ['Total National Emissions and Removals', '2 INDUSTRIAL PROCESSES AND PRODUCT USE'],
+            2: ['2B4 Caprolactam, Glyoxal and Glyoxylic Acid'],
+        }
+    },
+    '232': { # also 236
+        "area": ['48,510,797,99'],
+        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
+        "rows_to_fix": {
+            -3: ['2G2 SF6 and PFCs from Other Product Uses',],
+            2: ['2D Non-Energy Products from Fuels and Solvent',
+                '2F Product Uses as Substitutes for Ozone',]
+        },
+    },
+    '233': {
+        "area": ['25,512,819,86'],
+        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
+        "rows_to_fix": {
+            -5: ['2F Product Uses as Substitutes for Ozone'],
+            2: ['2D Non-Energy Products from Fuels and Solvent'],
+            -3: ['2G Other Product Manufacture and Use',
+                 '2G2 SF6 and PFCs from Other Product Uses',]
+        },
+    },
+    '237': {
+        "area": ['25,512,819,86'],
+        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
+        "rows_to_fix": {
+            2: ['2D Non-Energy Products from Fuels and Solvent',
+                '2F Product Uses as Substitutes for Ozone'],
+        },
+    },
+    '240': {
+        "area": ['48,510,797,99'],
+        "cols": ['271,310,350,393,435,475,514,557,594,640,678,719,760'],
+        "rows_to_fix": {
+            2: ['2D Non-Energy Products from Fuels and Solvent',
+                '2F Product Uses as Substitutes for Ozone'],
+            -3: ['2E Electronics Industry',
+                 '2F1 Refrigeration and Air Conditioning',
+                 '2G2 SF6 and PFCs from Other Product Uses',],
+        },
+    },
+    '241': {
+        "area": ['25,512,819,86'],
+        "cols": ['246,291,331,370,412,454,495,536,577,619,656,699,740,777'],
+        "rows_to_fix": {
+            2: ['2D Non-Energy Products from Fuels and Solvent',
+                '2F Product Uses as Substitutes for Ozone',
+                '2E1 Integrated Circuit or Semiconductor',],
+            -3: ['2F1 Refrigeration and Air Conditioning',
+                 '2G2 SF6 and PFCs from Other Product Uses',],
+        },
+    },
+}
+
+table_defs = {
+    '184': {"template": '184', "entity": "CO2", "unit": "Gg CO2 / yr"}, #CO2
+    '185': {"template": '185', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '186': {"template": '186', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '187': {"template": '187', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '188': {"template": '188', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '189': {"template": '189', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '190': {"template": '190', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '191': {"template": '191', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '192': {"template": '192', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '193': {"template": '193', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '194': {"template": '194', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '195': {"template": '195', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '196': {"template": '196', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '197': {"template": '197', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '198': {"template": '198', "entity": "CH4", "unit": "Gg CH4 / yr"}, #CH4
+    '199': {"template": '199', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '200': {"template": '186', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '201': {"template": '187', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '202': {"template": '202', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '203': {"template": '203', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '204': {"template": '204', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '205': {"template": '205', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '206': {"template": '206', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '207': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '208': {"template": '208', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '209': {"template": '209', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '210': {"template": '210', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '211': {"template": '211', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '212': {"template": '212', "entity": "N2O", "unit": "Gg N2O / yr"}, #N2O
+    '213': {"template": '213', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '214': {"template": '214', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '215': {"template": '215', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '216': {"template": '216', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '217': {"template": '217', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '218': {"template": '218', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '219': {"template": '219', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '220': {"template": '206', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '221': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '222': {"template": '208', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '223': {"template": '209', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '224': {"template": '210', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '225': {"template": '211', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '226': {"template": '226', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #HFCs
+    '227': {"template": '227', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '228': {"template": '228', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '229': {"template": '229', "entity": "HFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '230': {"template": '230', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"}, #PFCs
+    '231': {"template": '227', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '232': {"template": '232', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '233': {"template": '233', "entity": "PFCS (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '234': {"template": '226', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #SF6
+    '235': {"template": '227', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '236': {"template": '232', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '237': {"template": '237', "entity": "SF6 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '238': {"template": '226', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"}, #NF3
+    '239': {"template": '227', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '240': {"template": '240', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+    '241': {"template": '241', "entity": "NF3 (AR4GWP100)", "unit": "Gg CO2 / yr"},
+}
+
+country_processing_step1 = {
+    'aggregate_cats': {
+        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
+                                 '3.C.6', '3.C.7', '3.C.8'],
+                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
+                             '(Agriculture)'},
+        'M.3.D.AG': {'sources': ['3.D.2'],
+                     'name': 'Other (Agriculture)'},
+        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
+                     'name': 'Agriculture excluding livestock'},
+        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
+                     'name': 'Agriculture'},
+        'M.3.D.LU': {'sources': ['3.D.1'],
+                     'name': 'Other (LULUCF)'},
+        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
+                     'name': 'LULUCF'},
+        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
+                     'name': 'National total emissions excluding LULUCF'},
+    },
+    'basket_copy': {
+        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        'entities': ["HFCS", "PFCS"],
+        'source_GWP': gwp_to_use,
+    },
+}
+
+gas_baskets = {
+    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (SARGWP100)',
+                           'Unspecified mix of PFCs (SARGWP100)'],
+    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (AR4GWP100)',
+                           'Unspecified mix of PFCs (AR4GWP100)'],
+    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR5GWP100)',
+                          'Unspecified mix of PFCs (AR5GWP100)'
+                          ],
+    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR6GWP100)',
+                          'Unspecified mix of PFCs (AR6GWP100)'
+                          ],
+    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
+    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
+    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
+    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
+}

+ 412 - 0
UNFCCC_GHG_data/UNFCCC_reader/Malaysia/config_MYS_BUR4.py

@@ -0,0 +1,412 @@
+import pandas as pd
+gwp_to_use = "AR4GWP100"
+
+
+cat_names_fix = {
+    #'2A3 Glass Prod.': '2A3 Glass Production',
+    #'2F6 Other Applications': '2F6 Other Applications (please specify)',
+    #'3A2 Manure Mngmt': '3A2 Manure Mngmt.',
+    #'3C7 Rice Cultivations': '3C7 Rice Cultivation',
+}
+
+values_replacement = {
+    '': '-',
+    ' ': '-',
+}
+
+cols_for_space_stripping = ["Categories"]
+
+index_cols = ["Categories", "entity", "unit"]
+
+# parameters part 2: conversion to interchange format
+cats_remove = ['Memo items', 'Information items',  'Information items (1)']
+
+cat_codes_manual = {
+    'Annual change in long-term storage of carbon in HWP waste': 'M.LTS.AC.HWP',
+    'Annual change in total long-term storage of carbon stored': 'M.LTS.AC.TOT',
+    'CO2 captured': 'M.CCS',
+    'CO2 from Biomass Burning for Energy Production': 'M.BIO',
+    'For domestic storage': 'M.CCS.DOM',
+    'For storage in other countries': 'M.CCS.OCT',
+    'International Aviation (International Bunkers)': 'M.BK.A',
+    'International Bunkers': 'M.BK',
+    'International Water-borne Transport (International Bunkers)': 'M.BK.M',
+    'Long-term storage of carbon in waste disposal sites': 'M.LTS.WASTE',
+    'Multilateral Operations': 'M.MULTIOP',
+    'Other (please specify)': 'M.OTHER',
+    'Total National Emissions and Removals': '0',
+}
+
+cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,4})\s.*'
+
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "MYS-GHG-inventory",
+    "provenance": "measured",
+    "area": "MYS",
+    "scenario": "BUR4"
+}
+
+coords_value_mapping = {
+}
+
+coords_cols = {
+    "category": "Categories",
+    "entity": "entity",
+    "unit": "unit"
+}
+
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
+
+#filter_remove = {
+#    "f1": {
+#        "entity": ["CO2(grossemissions)", "CO2(removals)"],
+#    },
+#}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/624776",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "Malaysia - Fourth Biennial Update Report under the UNFCCC",
+    "comment": "Read fom pdf file by Johannes Gütschow",
+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+}
+
+terminology_proc = coords_terminologies["category"]
+
+table_def_templates = {
+    # CO2
+    '203': {  # 203, 249
+        "area": ['70,480,768,169'],
+    },
+    '204': {  # 204
+        "area": ['70,500,763,141'],
+    },
+    '205': {  # 205, 209, 2014, 2018
+        "area": ['70,495,763,95'],
+        "rows_to_fix": {
+            2: ['5A Indirect N2O emissions from the atmospheric deposition of'],
+        },
+    },
+    '206': {  # 206
+        "area": ['70,495,763,353'],
+    },
+    '207': {  # 207, 208, 211, 212, 213, 215, 217, 223, 227, 231,
+        # 251, 257, 259, 263, 265
+        "area": ['70,495,763,95'],
+    },
+    '216': {  #  216
+        "area": ['70,500,763,95'],
+    },
+    # CH4
+    '219': {  # 219, 255
+        "area": ['70,480,768,100'],
+    },
+    '220': {  # 220, 224, 228
+        "area": ['70,495,763,95'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '221': {  # 221
+        "area": ['92,508,748,92'],
+        "cols": ['298,340,380,422,462,502,542,582,622,662,702'],
+        "rows_to_fix": {
+            3: ['3C Aggregate sources and Non-CO2 emissions'],
+            2: ['5A Indirect N2O emissions from the atmospheric'],
+        },
+    },
+    '222': {  # 222
+        "area": ['70,495,763,323'],
+        "rows_to_fix": {
+            2: ['Annual change in long-term storage of carbon in HWP'],
+        },
+    },
+    '225': {  # 225
+        "area": ['92,508,748,92'],
+        "cols": ['311,357,400,443,486,529,572,615,658,701'],
+        "rows_to_fix": {
+            3: ['3C Aggregate sources and Non-CO2 emissions'],
+        },
+    },
+    '226': {  # 226, 230
+        "area": ['70,495,763,95'],
+        "rows_to_fix": {
+            2: ['5A Indirect N2O emissions from the atmospheric',
+                'Annual change in long-term storage of carbon in HWP'],
+        },
+    },
+    '229': {  # 229
+        "area": ['114,508,725,92'],
+        "cols": ['333,379,421,464,506,548,590,632,674'],
+        "rows_to_fix": {
+            3: ['3C Aggregate sources and Non-CO2 emissions'],
+        },
+    },
+    # N2O
+    '232': {  # 232
+        "area": ['70,495,763,95'],
+        "cols": ['315,366,416,466,516,566,616,666,716'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '233': {  # 233
+        "area": ['70,495,763,95'],
+        "rows_to_fix": {
+            3: ['3C Aggregate sources and Non-CO2 emissions'],
+        },
+    },
+    '234': {  # 234
+        "area": ['70,495,763,95'],
+        "rows_to_fix": {
+            3: ['International Water-borne Transport (International'],
+        },
+    },
+    '236': {  # 236
+        "area": ['70,495,763,95'],
+        "cols": ['298,344,392,439,487,534,580,629,675,721'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '240': {  # 240
+        "area": ['70,495,763,95'],
+        "cols": ['283,329,372,416,459,504,550,594,639,682,726'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    # HFCs
+    '243': {  # 243
+        "area": ['70,480,763,95'],
+        "cols": ['408,449,489,527,567,604,644,681,721'],
+    },
+    '244': {  # 244
+        "area": ['70,495,763,95'],
+        "cols": ['408,449,489,527,567,604,644,681,721'],
+    },
+    '245': {  # 245, 246
+        "area": ['70,495,763,95'],
+        "cols": ['405,442,478,515,550,587,621,657,693,729'],
+    },
+    '247': {  # 247, 248
+        "area": ['70,495,763,95'],
+        "cols": ['384,426,459,493,531,564,597,633,666,700,735'],
+    },
+    # PFCs
+    '250': {  # 250
+        "area": ['70,495,763,95'],
+        "cols": ['341,389,436,485,531,579,626,674,723'],
+    },
+    '252': {  # 252
+        "area": ['70,495,763,95'],
+        "cols": ['323,370,415,459,504,547,590,636,680,726'],
+    },
+    '253': {  # 253
+        "area": ['70,495,763,95'],
+        "cols": ['334,378,419,464,511,554,597,636,668,702,735'],
+    },
+    '254': {  # 254
+        "area": ['70,495,763,95'],
+        "cols": ['330,378,419,464,511,554,597,636,668,702,735'],
+        "rows_to_fix": {
+            -3: ['2F Product Uses as Substitutes for Ozone Depleting Substances'],
+        },
+    },
+    # SF6
+    '256': {  # 256
+        "area": ['70,495,763,95'],
+        "cols": ['382,420,462,504,546,588,630,672,714'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '258': {  # 258
+        "area": ['70,495,763,95'],
+        "cols": ['363,399,441,481,522,564,606,646,688,728'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '260': {  # 260
+        "area": ['70,495,763,95'],
+        "cols": ['346,381,419,458,498,536,576,614,652,692,732'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    # NF3
+    '261': {  # 261
+        "area": ['70,490,768,100'],
+        "cols": ['364,412,454,496,538,581,623,667,710'],
+    },
+    '262': {  # 262
+        "area": ['70,495,763,95'],
+        "cols": ['376,420,462,504,545,591,633,676,718'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '264': {  # 264
+        "area": ['70,495,763,95'],
+        "cols": ['370,415,451,491,530,569,609,651,689,729'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+    '266': {  # 266
+        "area": ['70,495,763,95'],
+        "cols": ['355,392,430,467,505,544,580,619,656,695,732'],
+        "rows_to_fix": {
+            3: ['2F Product Uses as Substitutes for Ozone Depleting'],
+        },
+    },
+}
+
+table_defs = {
+    '203': {"template": '203', "entity": "CO2", "unit": "Gg CO2 / yr"},  # CO2
+    '204': {"template": '204', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '205': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '206': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '207': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '208': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '209': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '210': {"template": '206', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '211': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '212': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '213': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '214': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '215': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '216': {"template": '216', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '217': {"template": '207', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '218': {"template": '205', "entity": "CO2", "unit": "Gg CO2 / yr"},
+    '219': {"template": '219', "entity": "CH4", "unit": "Gg CH4 / yr"},  # CH4
+    '220': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '221': {"template": '221', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '222': {"template": '222', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '223': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '224': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '225': {"template": '225', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '226': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '227': {"template": '207', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '228': {"template": '220', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '229': {"template": '229', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '230': {"template": '226', "entity": "CH4", "unit": "Gg CH4 / yr"},
+    '231': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},  # N2O
+    '232': {"template": '232', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '233': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '234': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '235': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '236': {"template": '236', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '237': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '238': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '239': {"template": '207', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '240': {"template": '240', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '241': {"template": '233', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '242': {"template": '234', "entity": "N2O", "unit": "Gg N2O / yr"},
+    '243': {"template": '243', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},  # HFCs
+    '244': {"template": '244', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '245': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '246': {"template": '245', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '247': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '248': {"template": '247', "entity": f"HFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '249': {"template": '203', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},  # PFCs
+    '250': {"template": '250', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '251': {"template": '207', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '252': {"template": '252', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '253': {"template": '253', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '254': {"template": '254', "entity": f"PFCS ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '255': {"template": '219', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},  # SF6
+    '256': {"template": '256', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '257': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '258': {"template": '258', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '259': {"template": '207', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '260': {"template": '260', "entity": f"SF6 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '261': {"template": '261', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},  # NF3
+    '262': {"template": '262', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '263': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '264': {"template": '264', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '265': {"template": '207', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+    '266': {"template": '266', "entity": f"NF3 ({gwp_to_use})",
+            "unit": "Gg CO2 / yr"},
+}
+
+country_processing_step1 = {
+    'aggregate_cats': {
+        'M.3.C.AG': {'sources': ['3.C.1', '3.C.2', '3.C.3', '3.C.4', '3.C.5',
+                                 '3.C.6', '3.C.7', '3.C.8'],
+                     'name': 'Aggregate sources and non-CO2 emissions sources on land '
+                             '(Agriculture)'},
+        'M.3.D.AG': {'sources': ['3.D.2'],
+                     'name': 'Other (Agriculture)'},
+        'M.AG.ELV': {'sources': ['M.3.C.AG', 'M.3.D.AG'],
+                     'name': 'Agriculture excluding livestock'},
+        'M.AG': {'sources': ['3.A', 'M.AG.ELV'],
+                     'name': 'Agriculture'},
+        'M.3.D.LU': {'sources': ['3.D.1'],
+                     'name': 'Other (LULUCF)'},
+        'M.LULUCF': {'sources': ['3.B', 'M.3.D.LU'],
+                     'name': 'LULUCF'},
+        'M.0.EL': {'sources': ['1', '2', 'M.AG', '4', '5'],
+                     'name': 'National total emissions excluding LULUCF'},
+    },
+    'basket_copy': {
+        'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        'entities': ["HFCS", "PFCS"],
+        'source_GWP': gwp_to_use,
+    },
+}
+
+gas_baskets = {
+    'FGASES (SARGWP100)': ['HFCS (SARGWP100)', 'PFCS (SARGWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (SARGWP100)',
+                           'Unspecified mix of PFCs (SARGWP100)'],
+    'FGASES (AR4GWP100)': ['HFCS (AR4GWP100)', 'PFCS (AR4GWP100)', 'SF6', 'NF3',
+                           'Unspecified mix of HFCs (AR4GWP100)',
+                           'Unspecified mix of PFCs (AR4GWP100)'],
+    'FGASES (AR5GWP100)':['HFCS (AR5GWP100)', 'PFCS (AR5GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR5GWP100)',
+                          'Unspecified mix of PFCs (AR5GWP100)'
+                          ],
+    'FGASES (AR6GWP100)':['HFCS (AR6GWP100)', 'PFCS (AR6GWP100)', 'SF6', 'NF3',
+                          'Unspecified mix of HFCs (AR6GWP100)',
+                          'Unspecified mix of PFCs (AR6GWP100)'
+                          ],
+    'KYOTOGHG (SARGWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (SARGWP100)'],
+    'KYOTOGHG (AR4GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR4GWP100)'],
+    'KYOTOGHG (AR5GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR5GWP100)'],
+    'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'FGASES (AR6GWP100)'],
+}

+ 211 - 0
UNFCCC_GHG_data/UNFCCC_reader/Malaysia/read_MYS_BUR3_from_pdf.py

@@ -0,0 +1,211 @@
+# this script reads data from Malaysia's BUR3
+
+import camelot
+import primap2 as pm2
+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
+
+from UNFCCC_GHG_data.helper import process_data_for_country, fix_rows
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from config_MYS_BUR4 import coords_cols, coords_defaults, coords_terminologies, \
+    meta_data, add_coords_cols
+from config_MYS_BUR3 import gas_baskets, terminology_proc, country_processing_step1
+from config_MYS_BUR3 import table_def_templates, table_defs, index_cols
+from config_MYS_BUR3 import values_replacement, cat_names_fix, cols_for_space_stripping
+from config_MYS_BUR3 import cat_codes_manual, cats_remove, cat_code_regexp
+
+# ###
+# configuration
+# ###
+input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR3'
+output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+pdf_file = "MALAYSIA_BUR3-UNFCCC_Submission.pdf"
+pdf_pages = range(184, 242)
+# CH4: 198 - 211
+# N2O: 212 - 225
+# HFCS: 226 - 228
+# PFCs: 229 - 233
+# SF6: 234 - 237
+# NF3: 238 - 241
+
+output_filename = 'MYS_BUR3_2020_'
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# reading data and aggregation into one dataframe
+# ###
+df_all = None
+for page in pdf_pages:
+    print(f"++++++++++++++++++++++++++++++++")
+    print(f"+++++ Working on page {page} ++++++")
+    print(f"++++++++++++++++++++++++++++++++")
+    page_template_nr = table_defs[str(page)]["template"]
+    area = table_def_templates[page_template_nr]["area"]
+    if "cols" in table_def_templates[page_template_nr].keys():
+        cols = table_def_templates[page_template_nr]["cols"]
+        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
+                                  flavor='stream', table_areas=area, columns=cols,
+                                  split_text=True)
+    else:
+        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
+                                  flavor='stream', table_areas=area)
+
+    df_current = tables[0].df.copy()
+    df_current.iloc[0,0] = 'Categories'
+    df_current.columns = df_current.iloc[0]
+    df_current = df_current.drop(0)
+    # replace double \n
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("\n", " ")
+    # replace double and triple spaces
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("   ", " ")
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("  ", " ")
+
+    # fix the split rows
+    if "rows_to_fix" in table_def_templates[page_template_nr].keys():
+        for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
+            df_current = fix_rows(df_current,
+                                  table_def_templates[page_template_nr]["rows_to_fix"][
+                                      n_rows], index_cols[0], n_rows)
+
+    # replace category names with typos
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].replace(cat_names_fix)
+
+    # replace empty stings
+    df_current = df_current.replace(values_replacement)
+
+    # add entity and unit information
+    df_current.insert(1, "unit", table_defs[str(page)]["unit"])
+    df_current.insert(1, "entity", table_defs[str(page)]["entity"])
+
+    # set index
+    # df_current = df_current.set_index(index_cols)
+    # strip trailing and leading spaces
+    for col in cols_for_space_stripping:
+        df_current[col] = df_current[col].str.strip()
+
+    # print(df_current.columns.values)
+
+    # aggregate dfs
+    if df_all is None:
+        df_all = df_current
+    else:
+        # find intersecting cols
+        cols_all = df_all.columns.values
+        cols_current = df_current.columns.values
+        cols_both = list(set(cols_all).intersection(set(cols_current)))
+        # print(cols_both)
+        if len(cols_both) > 0:
+            df_all = df_all.merge(df_current, how='outer', on=cols_both,
+                                  suffixes=(None, None))
+        else:
+            df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
+        df_all = df_all.groupby(index_cols).first().reset_index()
+        # df_all = df_all.join(df_current, how='outer')
+
+# ###
+# conversion to primap2 interchange format
+# ###
+# drop the rows with memo items etc
+for cat in cats_remove:
+    df_all = df_all.drop(df_all[df_all["Categories"] == cat].index)
+# make a copy of the categories row
+df_all["orig_cat_name"] = df_all["Categories"]
+
+# replace cat names by codes in col "Categories"
+# first the manual replacements
+df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
+# then the regex repalcements
+repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
+
+# make sure all col headers are str
+df_all.columns = df_all.columns.map(str)
+
+# remove thousands separators as pd.to_numeric can't deal with that
+# also replace None with NaN
+year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
+for col in year_cols:
+    df_all.loc[:, col] = df_all.loc[:, col].str.strip()
+    repl = lambda m: m.group('part1') + m.group('part2')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
+    df_all[col][df_all[col].isnull()] = 'NaN'
+    # manually map code NENO to nan
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
+    # TODO: add code to PRIMAP2
+
+# drop orig_cat_name as it's non-unique per category
+df_all = df_all.drop(columns=["orig_cat_name"])
+
+data_if = pm2.pm2io.convert_wide_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    #add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    #coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    #filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+    )
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# save raw data to IF and native format
+# ###
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+    data_if)
+
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+    encoding=encoding)
+
+# ###
+# ## process the data
+# ###
+data_proc_pm2 = data_pm2
+
+# actual processing
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
+    gas_baskets=gas_baskets,
+    entities_to_ignore=[],
+    processing_info_country=country_processing_step1,
+)
+
+# adapt source and metadata
+current_source = data_proc_pm2.coords["source"].values[0]
+data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
+
+# ###
+# save data to IF and native format
+# ###
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + terminology_proc), data_proc_if)
+
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + terminology_proc + ".nc"),
+    encoding=encoding)

+ 214 - 0
UNFCCC_GHG_data/UNFCCC_reader/Malaysia/read_MYS_BUR4_from_pdf.py

@@ -0,0 +1,214 @@
+# this script reads data from Malaysia's BUR4
+# code ist mostly identical to BUR3
+
+
+import camelot
+import primap2 as pm2
+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
+
+from UNFCCC_GHG_data.helper import process_data_for_country, fix_rows
+from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from config_MYS_BUR4 import coords_cols, coords_defaults, coords_terminologies, \
+    meta_data, add_coords_cols
+from config_MYS_BUR4 import gas_baskets, terminology_proc, country_processing_step1
+from config_MYS_BUR4 import table_def_templates, table_defs, index_cols
+from config_MYS_BUR4 import values_replacement, cat_names_fix, cols_for_space_stripping
+from config_MYS_BUR4 import cat_codes_manual, cats_remove, cat_code_regexp
+
+# ###
+# configuration
+# ###
+input_folder = downloaded_data_path / 'UNFCCC' / 'Malaysia' / 'BUR4'
+output_folder = extracted_data_path / 'UNFCCC' / 'Malaysia'
+if not output_folder.exists():
+    output_folder.mkdir()
+
+pdf_file = "MY_BUR4_2022.pdf"
+pdf_pages = range(203, 267)
+# CO2: 203 - 218
+# CH4: 219 - 230
+# N2O: 231 - 2242
+# HFCS: 243 - 248
+# PFCs: 249 - 254
+# SF6: 255 - 260
+# NF3: 261 - 266
+
+output_filename = 'MYS_BUR4_2022_'
+compression = dict(zlib=True, complevel=9)
+
+# ###
+# reading data and aggregation into one dataframe
+# ###
+df_all = None
+for page in pdf_pages:
+    print(f"++++++++++++++++++++++++++++++++")
+    print(f"+++++ Working on page {page} ++++++")
+    print(f"++++++++++++++++++++++++++++++++")
+    page_template_nr = table_defs[str(page)]["template"]
+    area = table_def_templates[page_template_nr]["area"]
+    if "cols" in table_def_templates[page_template_nr].keys():
+        cols = table_def_templates[page_template_nr]["cols"]
+        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
+                                  flavor='stream', table_areas=area, columns=cols,
+                                  split_text=True)
+    else:
+        tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page), \
+                                  flavor='stream', table_areas=area)
+
+    df_current = tables[0].df.copy()
+    df_current.iloc[0,0] = 'Categories'
+    df_current.columns = df_current.iloc[0]
+    df_current = df_current.drop(0)
+    # replace double \n
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("\n", " ")
+    # replace double and triple spaces
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("   ", " ")
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].str.replace("  ", " ")
+
+    # fix the split rows
+    if "rows_to_fix" in table_def_templates[page_template_nr].keys():
+        for n_rows in table_def_templates[page_template_nr]["rows_to_fix"].keys():
+            df_current = fix_rows(df_current,
+                                  table_def_templates[page_template_nr]["rows_to_fix"][
+                                      n_rows], index_cols[0], n_rows)
+
+    # replace category names with typos
+    df_current[index_cols[0]] = \
+        df_current[index_cols[0]].replace(cat_names_fix)
+
+    # replace empty stings
+    df_current = df_current.replace(values_replacement)
+
+    # add entity and unit information
+    df_current.insert(1, "unit", table_defs[str(page)]["unit"])
+    df_current.insert(1, "entity", table_defs[str(page)]["entity"])
+
+    # set index
+    # df_current = df_current.set_index(index_cols)
+    # strip trailing and leading spaces
+    for col in cols_for_space_stripping:
+        df_current[col] = df_current[col].str.strip()
+
+    # print(df_current.columns.values)
+
+    # aggregate dfs
+    if df_all is None:
+        df_all = df_current
+    else:
+        # find intersecting cols
+        cols_all = df_all.columns.values
+        cols_current = df_current.columns.values
+        cols_both = list(set(cols_all).intersection(set(cols_current)))
+        # print(cols_both)
+        if len(cols_both) > 0:
+            df_all = df_all.merge(df_current, how='outer', on=cols_both,
+                                  suffixes=(None, None))
+        else:
+            df_all = df_all.merge(df_current, how='outer', suffixes=(None, None))
+        df_all = df_all.groupby(index_cols).first().reset_index()
+        # df_all = df_all.join(df_current, how='outer')
+
+# ###
+# conversion to primap2 interchange format
+# ###
+# drop the rows with memo items etc
+for cat in cats_remove:
+    df_all = df_all.drop(df_all[df_all["Categories"] == cat].index)
+# make a copy of the categories row
+df_all["orig_cat_name"] = df_all["Categories"]
+
+# replace cat names by codes in col "Categories"
+# first the manual replacements
+df_all["Categories"] = df_all["Categories"].replace(cat_codes_manual)
+# then the regex repalcements
+repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+df_all["Categories"] = df_all["Categories"].str.replace(cat_code_regexp, repl, regex=True)
+
+# make sure all col headers are str
+df_all.columns = df_all.columns.map(str)
+
+# remove thousands separators as pd.to_numeric can't deal with that
+# also replace None with NaN
+year_cols = list(set(df_all.columns) - set(['Categories', 'entity', 'unit', 'orig_cat_name']))
+for col in year_cols:
+    df_all.loc[:, col] = df_all.loc[:, col].str.strip()
+    repl = lambda m: m.group('part1') + m.group('part2')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
+    df_all[col][df_all[col].isnull()] = 'NaN'
+    # manually map code NENO to nan
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('NENO','NaN')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('O NANaN','NaN')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NO','0')
+    df_all.loc[:, col] = df_all.loc[:, col].str.replace('IE NA NO I','0')
+    # TODO: add code to PRIMAP2
+
+# drop orig_cat_name as it's non-unique per category
+df_all = df_all.drop(columns=["orig_cat_name"])
+
+data_if = pm2.pm2io.convert_wide_dataframe_if(
+    df_all,
+    coords_cols=coords_cols,
+    #add_coords_cols=add_coords_cols,
+    coords_defaults=coords_defaults,
+    coords_terminologies=coords_terminologies,
+    #coords_value_mapping=coords_value_mapping,
+    #coords_value_filling=coords_value_filling,
+    #filter_remove=filter_remove,
+    #filter_keep=filter_keep,
+    meta_data=meta_data,
+    convert_str=True,
+    time_format="%Y",
+    )
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+
+data_if = data_pm2.pr.to_interchange_format()
+
+# ###
+# save raw data to IF and native format
+# ###
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+    data_if)
+
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+    encoding=encoding)
+
+# ###
+# ## process the data
+# ###
+data_proc_pm2 = data_pm2
+
+# actual processing
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
+    gas_baskets=gas_baskets,
+    entities_to_ignore=[],
+    processing_info_country=country_processing_step1,
+)
+
+# adapt source and metadata
+current_source = data_proc_pm2.coords["source"].values[0]
+data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
+
+# ###
+# save data to IF and native format
+# ###
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + terminology_proc), data_proc_if)
+
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + terminology_proc + ".nc"),
+    encoding=encoding)

+ 1 - 1
UNFCCC_GHG_data/UNFCCC_reader/Thailand/config_THA_BUR3.py

@@ -250,7 +250,7 @@ country_processing_step2 = {
     'basket_copy': {
         'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
         'entities': ["HFCS", "PFCS"],
-        'source_GWP': 'AR4GWP100',
+        'source_GWP': gwp_to_use,
     },
 }
 ## not in BUR3: 1.A.1.a, 1.A.1.b, 1.A.3.a, 1.A.3.b, 1.A.3.c, 1.A.3.d, 1.A.5, 1.B.3,

+ 1 - 1
UNFCCC_GHG_data/UNFCCC_reader/Thailand/config_THA_BUR4.py

@@ -233,7 +233,7 @@ country_processing_step2 = {
     'basket_copy': {
         'GWPs_to_add': ["SARGWP100", "AR5GWP100", "AR6GWP100"],
         'entities': ["HFCS", "PFCS"],
-        'source_GWP': 'AR4GWP100',
+        'source_GWP': gwp_to_use,
     },
 }
 

+ 0 - 3
UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py

@@ -1,9 +1,6 @@
 # this script reads data from Thailand's BUR3
 # Data is read from the pdf file
 
-import os
-os.environ["UNFCCC_GHG_ROOT_PATH"] = \
-     "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
 import pandas as pd
 import primap2 as pm2
 import camelot

+ 0 - 3
UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR4_from_pdf.py

@@ -10,9 +10,6 @@
 # CO2eq and thus HFC data can be used and SF6 data is not 0 as in the mein inventory
 # tables
 
-import os
-os.environ["UNFCCC_GHG_ROOT_PATH"] = \
-     "/storage/data/data/PRIMAP/primap_2.0/datasets/UNFCCC_non-AnnexI_data/"
 import pandas as pd
 import primap2 as pm2
 

+ 1 - 0
UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json

@@ -7,6 +7,7 @@
     "MAR": "Morocco",
     "COL": "Colombia",
     "CHL": "Chile",
+    "MYS": "Malaysia",
     "MNE": "Montenegro",
     "ISR": "Israel",
     "IDN": "Indonesia"

+ 2 - 0
UNFCCC_GHG_data/helper/__init__.py

@@ -7,6 +7,7 @@ from .definitions import custom_country_mapping, custom_folders
 from .definitions import GWP_factors
 from .functions import get_country_code, get_country_name, convert_categories
 from .functions import create_folder_mapping, process_data_for_country, get_code_file
+from .functions import fix_rows
 
 __all__ = [
     "root_path",
@@ -27,4 +28,5 @@ __all__ = [
     "convert_categories",
     "create_folder_mapping",
     "process_data_for_country",
+    "fix_rows",
 ]

+ 44 - 1
UNFCCC_GHG_data/helper/functions.py

@@ -822,4 +822,47 @@ def get_code_file(
     if code_file_path is not None:
         return code_file_path.relative_to(root_path)
     else:
-        return None
+        return None
+
+
+def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int)->pd.DataFrame:
+    '''
+    Function to fix rows that have been split during reading from pdf
+    This is the version used for Malaysia BUR3,4. adapt for other BURs if needed
+
+    :param data:
+    :param rows_to_fix:
+    :param col_to_use:
+    :param n_rows:
+    :return:
+    '''
+    for row in rows_to_fix:
+        #print(row)
+        # find the row number and collect the row and the next two rows
+        index = data.loc[data[col_to_use] == row].index
+        #print(list(index))
+        if not list(index):
+            print(f"Can't merge split row {row}")
+            print(data[col_to_use])
+        #print(f"Merging split row {row} for table {page}")
+        loc = data.index.get_loc(index[0])
+        if n_rows == -3:
+            locs_to_merge = list(range(loc - 1, loc + 2))
+        elif n_rows == -5:
+            locs_to_merge = list(range(loc - 1, loc + 4))
+        else:
+            locs_to_merge = list(range(loc, loc + n_rows))
+        rows_to_merge = data.iloc[locs_to_merge]
+        indices_to_merge = rows_to_merge.index
+        # join the three rows
+        new_row = rows_to_merge.agg(' '.join)
+        # replace the double spaces that are created
+        # must be done here and not at the end as splits are not always
+        # the same and join would produce different col values
+        new_row = new_row.str.replace("  ", " ")
+        new_row = new_row.str.replace("N O", "NO")
+        new_row = new_row.str.replace(", N", ",N")
+        new_row = new_row.str.replace("- ", "-")
+        data.loc[indices_to_merge[0]] = new_row
+        data = data.drop(indices_to_merge[1:])
+    return data