Browse Source

Merge pull request #96 from JGuetschow/bangladesh-BUR1

crdanielbusch 4 months ago
parent
commit
909629ea29

+ 6 - 0
docs/source/api/unfccc_ghg_data.unfccc_reader.Bangladesh.config_bgd_bur1.rst

@@ -0,0 +1,6 @@
+unfccc\_ghg\_data.unfccc\_reader.Bangladesh.config\_bgd\_bur1
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: unfccc_ghg_data.unfccc_reader.Bangladesh.config_bgd_bur1
+
+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Bangladesh.config_bgd_bur1

+ 6 - 0
docs/source/api/unfccc_ghg_data.unfccc_reader.Bangladesh.read_BGD_BUR1_from_pdf.rst

@@ -0,0 +1,6 @@
+unfccc\_ghg\_data.unfccc\_reader.Bangladesh.read\_BGD\_BUR1\_from\_pdf
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: unfccc_ghg_data.unfccc_reader.Bangladesh.read_BGD_BUR1_from_pdf
+
+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Bangladesh.read_BGD_BUR1_from_pdf

+ 13 - 0
docs/source/api/unfccc_ghg_data.unfccc_reader.Bangladesh.rst

@@ -0,0 +1,13 @@
+unfccc\_ghg\_data.unfccc\_reader.Bangladesh
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: unfccc_ghg_data.unfccc_reader.Bangladesh
+
+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Bangladesh
+
+
+.. autosummary::
+  :toctree: ./
+
+  unfccc_ghg_data.unfccc_reader.Bangladesh.config_bgd_bur1
+  unfccc_ghg_data.unfccc_reader.Bangladesh.read_BGD_BUR1_from_pdf

+ 1 - 0
docs/source/api/unfccc_ghg_data.unfccc_reader.rst

@@ -10,6 +10,7 @@ unfccc\_ghg\_data.unfccc\_reader
   :toctree: ./
 
   unfccc_ghg_data.unfccc_reader.Argentina
+  unfccc_ghg_data.unfccc_reader.Bangladesh
   unfccc_ghg_data.unfccc_reader.Burundi
   unfccc_ghg_data.unfccc_reader.Chile
   unfccc_ghg_data.unfccc_reader.China

+ 1 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/j5/Xv/MD5E-s48259--34f747d82aba0fd796f26a7d3e667cd6.csv/MD5E-s48259--34f747d82aba0fd796f26a7d3e667cd6.csv

+ 1 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/mJ/q3/MD5E-s68450--6bdee1bd6dbfb88e785b30c60bc9c0e2.nc/MD5E-s68450--6bdee1bd6dbfb88e785b30c60bc9c0e2.nc

+ 24 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,24 @@
+attrs:
+  references: https://unfccc.int/documents/634149
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Bangladesh. Biennial update report (BUR). BUR1 Processed on 2024-08-22 Processed
+    on 2024-08-22
+  comment: Read fom pdf by Daniel Busch Processed on 2024-08-22 Processed on 2024-08-22
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+  entity: KYOTOGHG
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - source
+  - category (IPCC2006_PRIMAP)
+  - area (ISO3)
+  - provenance
+  - scenario (PRIMAP)
+  - entity
+  - unit
+data_file: BGD_BUR1_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP_raw.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/jz/0P/MD5E-s11281--6f4f7230e2e1548faebd2687a917b6c7.csv/MD5E-s11281--6f4f7230e2e1548faebd2687a917b6c7.csv

+ 1 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP_raw.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/K2/Q6/MD5E-s39722--d598b22fa54ef1d596d65cd1847ac0d9.nc/MD5E-s39722--d598b22fa54ef1d596d65cd1847ac0d9.nc

+ 22 - 0
extracted_data/UNFCCC/Bangladesh/BGD_BUR1_2023_IPCC2006_PRIMAP_raw.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/634149
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Bangladesh. Biennial update report (BUR). BUR1
+  comment: Read fom pdf by Daniel Busch
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - source
+  - category (IPCC2006_PRIMAP)
+  - area (ISO3)
+  - provenance
+  - scenario (PRIMAP)
+  - entity
+  - unit
+data_file: BGD_BUR1_2023_IPCC2006_PRIMAP_raw.csv

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/Bangladesh/__init__.py

@@ -0,0 +1,30 @@
+"""Read Bangladesh's BURs, NIRs, NCs
+
+Scripts and configurations to read Burundi's submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'BGD'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=BGD
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 1053 - 0
src/unfccc_ghg_data/unfccc_reader/Bangladesh/config_bgd_bur1.py

@@ -0,0 +1,1053 @@
+"""
+Configuration file to read Bangladesh's BUR 1.
+
+# Overview of all available GHG tables
+
+# not reading:
+# table 7, page 70 - already in main table in annex
+# table 8, page 71 - only four new data points for 2012
+# figure 22, page 83 - this is a summary of all energy tables
+# figure 23 - image of summary of IPUU, data are available as tables in same
+# chapter
+# table 27 - rice cultivation available - in main tables
+# table 28 - N2O from fertilizers - in main tables
+# table 29 - indirect N2O from fertilizer - in main table
+# table 31 - enteric CH4 by livestock - low priority
+# table 32 - manure CH4 by livestock - low priority
+# table 37 - already in main tables in annex
+# table 19-23, ammonia-urea, cement, glass, lubricants, steel mills
+# 2013-2019 on pages 86-88 - already in main table
+
+# reading:
+# table 16, page 78 - 2013-2019 by industry sub-sectors - typed
+# table 17, page 79 - 2013-2019 transportation - typed
+# table 18, page 80 - residential 2013-2019 commercial sector  - typed
+# table 19, page 80 - agriculture energy use 2013-2019 - typed
+# figure 20, page 81 - gas leakage 2013-2019 - typed
+
+
+"""
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "BGD-GHG-Inventory",
+    "provenance": "measured",
+    "area": "BGD",
+    "scenario": "BUR1",
+}
+
+gwp_to_use = "AR4GWP100"
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+}
+
+filter_remove = {
+    "f_memo": {"category": "MEMO"},
+    "f_info": {"category": "INFO"},
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/634149",
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Bangladesh. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+inv_conf = {
+    "entity_row": 0,
+    "unit_row": 1,
+    "index_cols": "Greenhouse gas source and sink categories",
+    "header_long": ["orig_cat_name", "entity", "unit", "time", "data"],
+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
+}
+inv_conf_per_year = {
+    "2013": {
+        "page_defs": {
+            "207": {
+                "area": ["60,630,534,79"],
+                "cols": ["387,444,495"],
+                "skip_rows_start": 0,
+                "skip_rows_end": 0,
+            },
+            "208": {
+                "area": ["63,720,527,120"],
+                "cols": ["380,437,491"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: [
+                "3 - GHG Emissions Agriculture, Livestock & Forest and Other Land -Use"
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 6,
+        # TODO The manual codes can be summarised for all years
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2014": {
+        "page_defs": {
+            "209": {
+                "area": ["74,715,542,78"],
+                "cols": ["380,441,498"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "210": {
+                "area": ["64,715,529,196"],
+                "cols": ["380,435,491"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-a - Fuel combustion activities_energy industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2015": {
+        "page_defs": {
+            "211": {
+                "area": ["75,712,550,88"],
+                "cols": ["375,444,498"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "212": {
+                "area": ["64,711,524,90"],
+                "cols": ["369,436,492"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+                "a-co2 emission from soil",
+                "c-co2 emission due to fuel wood removal for consumption",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste wate"r" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+        # These three categories only appear in 2015 and are all zero
+        "categories_to_drop": [
+            "a-co2 emission from soil",
+            "c-co2 emission due to fuel wood removal for consumption",
+            "B-conversion of forest land to other land use",
+        ],
+    },
+    "2016": {
+        "page_defs": {
+            "213": {
+                "area": ["73,712,544,77"],
+                "cols": ["373,444,498"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "214": {
+                "area": ["66,711,533,143"],
+                "cols": ["359,435,492"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2017": {
+        "page_defs": {
+            "215": {
+                "area": ["74,715,543,80"],
+                "cols": ["382,444,497"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "216": {
+                "area": ["64,720,530,158"],
+                "cols": ["380,433,490"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "2 a. 1-cement Production": "2.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2018": {
+        "page_defs": {
+            "217": {
+                "area": ["75,713,542,91"],
+                "cols": ["378,446,499"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "218": {
+                "area": ["63,714,528,154"],
+                "cols": ["374,438,491"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo items (5)": "MEMO",
+            "Information Items": "INFO",
+            "international Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+    },
+    "2019": {
+        "page_defs": {
+            "219": {
+                "area": ["75,713,542,91"],
+                "cols": ["378,446,499"],
+                "skip_rows_start": 9,
+                "skip_rows_end": 0,
+            },
+            "220": {
+                "area": ["63,714,524,139"],
+                "cols": ["374,438,491"],
+                "skip_rows_start": 8,
+                "skip_rows_end": 4,
+            },
+        },
+        "rows_to_fix": {
+            3: ["3 - GHG Emissions Agriculture, Livestock & Forest and Other"],
+            -2: [
+                "ch4 emission from rice field",
+                "indirect nitrous oxide (n2o) from n based fertilizer",
+                "Direct nitrous oxide (n2o) emissions from fertilizer application",
+                "Total enteric ch4 emissions",
+                "Total Manure ch4 emissions",
+                "Total Direct n2o emissions from manure system",
+                "Total indirect n2o emissions - Volatilization",
+                "Total indirect n2o emissions - leaching/Runoff",
+                "CO2 from Biomass burning for Energy purpose",
+            ],
+            2: [
+                # "B-Methane emission from domestic waste water" and
+                # "c-nitrous oxide emission from domestic waste water" are the same category
+                # and should be merged
+                "B-Methane emission from domestic waste water",
+                # "Total Manure ch4 emissions" and
+                # "Total Direct n2o emissions from manure system" are the same category
+                # and should be merged
+                "Total Manure ch4 emissions",
+            ],
+        },
+        "header": [
+            "Greenhouse gas source and sink categories",
+            "CO2",
+            "CH4",
+            "N2O",
+        ],
+        "unit": [
+            "-",
+            "Gg",
+            "Gg",
+            "Gg",
+        ],
+        "skip_rows": 0,
+        "cat_codes_manual": {
+            "1-A - Fuel Combustion Activities_Energy Industries": "1.A",
+            "1 - a1- electricity Generation": "1.A.1",
+            "1.a2- Manufacturing industries and construction": "1.A.2",
+            "1.a3-Transport": "1.A.3",
+            "1.a4-other sectors": "1.A.4",
+            "2 a. 1-cement Production": "2.A.1",
+            "2a3 Glass Production": "2.A.3",
+            "2 B. 1 - ammonia Production": "2.B.1",
+            "2 C-Metal Industry": "2.C",
+            "2 c. 1 iron and steel Production": "2.C.1",
+            "2. D - Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "2D 1-lubricant Use": "2.D.1",
+            "ch4 emission from rice field": "3.C.7",
+            "indirect nitrous oxide (n2o) from n based fertilizer": "3.C.5",
+            "Direct nitrous oxide (n2o) emissions from fertilizer application": "3.C.4",
+            "Direct carbon Dioxide emissions from urea fertilizer": "3.C.3",
+            "Total enteric ch4 emissions": "3.A.1",
+            "Total Manure ch4 emissions Total Direct n2o emissions from manure system": "3.A.2",
+            "Total indirect n2o emissions -Volatilization": "3.C.5",
+            "Total indirect n2o emissions -leaching/Runoff": "3.C.5",
+            "4 a-solid Waste Disposal": "4.A",
+            "B-Methane emission from domestic waste water c-nitrous oxide emission from domestic waste water": "4.D.1",
+            "D- Metahne emission from industrial waste water": "4.D.2",
+            "Memo Items (5)": "MEMO",
+            "Information Items": "INFO",
+            "International Bunkers": "M.BK",
+            "a-international aviation (international Bunkers)": "M.BK.A",
+            "B-international Water-borne navigation (international Bunkers)": "M.BK.M",
+            "CO2 from Biomass burning for Energy purpose": "M.BIO",
+        },
+        "merge_cats": "3C5",
+        "categories_to_drop": ["in eq. Million Tons"],
+    },
+}
+
+# needed for the pandas wide_to_long function
+wide_to_long_col_replace = {
+    "2013": "data2013",
+    "2014": "data2014",
+    "2015": "data2015",
+    "2016": "data2016",
+    "2017": "data2017",
+    "2018": "data2018",
+    "2019": "data2019",
+}
+
+manually_typed = {
+    "figure_16": {
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
+        "data": {
+            "category": [
+                "1.A.2.a",
+                "1.A.2.b",
+                "1.A.2.c",
+                "1.A.2.d",
+                "1.A.2.e",
+                "1.A.2.f",
+                "1.A.2.g",
+                "1.A.2.h",
+                "1.A.2.i",
+                "1.A.2.j",
+                "1.A.2.k",
+                "1.A.2.l",
+                "1.A.2.m",
+            ],
+            "2013": [
+                709,
+                6,
+                796,
+                421,
+                553,
+                12174,
+                1,
+                1,
+                0,
+                4,
+                5,
+                4885,
+                1280,
+            ],
+            "2014": [
+                706,
+                0,
+                636,
+                409,
+                515,
+                13896,
+                0,
+                0,
+                0,
+                0,
+                203,
+                4793,
+                1276,
+            ],
+            "2015": [
+                778,
+                0,
+                545,
+                458,
+                532,
+                13660,
+                0,
+                0,
+                0,
+                0,
+                195,
+                5180,
+                1799,
+            ],
+            "2016": [
+                830,
+                0,
+                445,
+                473,
+                527,
+                15771,
+                0,
+                0,
+                0,
+                0,
+                118,
+                5375,
+                1700,
+            ],
+            "2017": [
+                883,
+                0,
+                344,
+                492,
+                522,
+                15141,
+                0,
+                0,
+                0,
+                0,
+                179,
+                5294,
+                1546,
+            ],
+            "2018": [
+                943,
+                0,
+                247,
+                519,
+                519,
+                15949,
+                0,
+                0,
+                0,
+                0,
+                193,
+                5810,
+                1764,
+            ],
+            "2019": [
+                988,
+                0,
+                123,
+                527,
+                516,
+                16091,
+                0,
+                0,
+                0,
+                0,
+                216,
+                5935,
+                2492,
+            ],
+        },
+    },
+    "figure_17": {
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
+        "data": {
+            "category": [
+                "1.A.3.a.ii",
+                "1.A.3.b.i.2",
+                "1.A.3.b.ii.2",
+                "1.A.3.b.iii",
+                "1.A.3.b.iv",
+                "1.A.3.c",
+                "1.A.3.d.ii",
+            ],
+            "2013": [
+                694,
+                1450,
+                1215,
+                8960,
+                979,
+                115,
+                162,
+            ],
+            "2014": [
+                704,
+                1485,
+                914,
+                8126,
+                934,
+                113,
+                196,
+            ],
+            "2015": [
+                738,
+                1708,
+                1135,
+                9082,
+                1030,
+                117,
+                208,
+            ],
+            "2016": [
+                757,
+                1710,
+                1030,
+                8504,
+                1089,
+                115,
+                350,
+            ],
+            "2017": [
+                822,
+                1962,
+                1298,
+                10201,
+                1136,
+                156,
+                289,
+            ],
+            "2018": [
+                890,
+                2019,
+                1410,
+                10320,
+                1152,
+                140,
+                332,
+            ],
+            "2019": [
+                938,
+                2440,
+                1985,
+                12682,
+                1232,
+                168,
+                401,
+            ],
+        },
+    },
+    "figure_18": {
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
+        "data": {
+            "category": ["1.A.4.a", "1.A.4.b"],
+            "2013": [1871, 6703],
+            "2014": [1619, 6960],
+            "2015": [1522, 8573],
+            "2016": [1260, 9755],
+            "2017": [981, 9702],
+            "2018": [833, 11355],
+            "2019": [835, 12317],
+        },
+    },
+    "figure_19": {
+        # In other than stated in the figure, these are KYOTO gases in CO2eq
+        "unit": "GgCO2eq",
+        "entity": "KYOTOGHG (AR4GWP100)",
+        "data": {
+            "category": ["1.A.4.c.i", "1.A.4.c.iii", "1.A.4.c"],
+            "2013": [2692, 5, 2697],
+            "2014": [2804, 5, 2809],
+            "2015": [2977, 6, 2983],
+            "2016": [3035, 6, 3040],
+            "2017": [2903, 6, 2909],
+            "2018": [3496, 6, 3502],
+            "2019": [3446, 6, 3452],
+        },
+    },
+    "figure_20": {
+        "unit": "GgCO2eq",
+        "entity": "CH4",
+        "data": {
+            "category": ["1.B.2.b.iii.4", "1.B.2.b.iii.5", "1.B.2.b.iii"],
+            "2013": [896, 8440, 9336],
+            "2014": [896, 8440, 9336],
+            "2015": [896, 8440, 9336],
+            "2016": [896, 8440, 9336],
+            "2017": [896, 6429, 7325],
+            "2018": [896, 6429, 7325],
+            "2019": [896, 4289, 5185],
+        },
+        "unit_conversion": {"new_unit": "Gg", "conversion_factor": 25},
+    },
+}
+
+# correct values that are obviously wrong in the tables
+values_to_correct = [
+    # the sum of 1.A sub-categories does not match the value of 1.A
+    ("1.A", "CH4", "2014", 110),
+    ("1.A", "CO2", "2014", 77373),
+    ("1", "N2O", "2014", 3.8),
+    # For the sum for CO2 in category 3 they forgot to add 3.B
+    ("3", "CO2", "2013", 8140),
+    ("3", "CO2", "2014", 8923),
+    ("3", "CO2", "2015", 9791),
+    ("3", "CO2", "2016", 10518),
+    ("3", "CO2", "2017", 11359),
+    ("3", "CO2", "2018", 11993),
+    ("3", "CO2", "2019", 12640),
+]
+
+country_processing_step1 = {
+    "tolerance": 0.01,
+    "aggregate_cats": {
+        "3.A": {"sources": ["3.A.1", "3.A.2"]},
+        "M.3.C.AG": {  # "Aggregate sources and non-CO2 emissions sources on land (Agriculture)"
+            "sources": [
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.7",
+            ]
+        },
+        "3.C": {"sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.7"]},
+        "M.AG.ELV": {
+            "sources": ["M.3.C.AG", "M.3.D.AG"],
+        },
+        "M.AG": {"sources": ["3.A", "M.AG.ELV"]},  # agriculture
+        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"]},
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+        },
+        "1.B": {"sources": ["1.B.2"]},
+        "4.D": {"sources": ["4.D.1", "4.D.2"]},
+        "1": {"sources": ["1.A", "1.B"]},  # consistency check energy
+        "2": {"sources": ["2.A", "2.B", "2.C", "2.D", "2.F"]},  # consistency check IPPU
+        "3": {"sources": ["M.AG", "M.LULUCF"]},  # consistency check AFOLU
+        "4": {"sources": ["4.A", "4.D"]},  # consistency check waste
+        # check if typed numbers add up to the total of 1.A.2 from the main table
+        "1.A.2": {
+            "sources": [
+                "1.A.2.a",
+                "1.A.2.b",
+                "1.A.2.c",
+                "1.A.2.d",
+                "1.A.2.e",
+                "1.A.2.f",
+                "1.A.2.g",
+                "1.A.2.h",
+                "1.A.2.i",
+                "1.A.2.j",
+                "1.A.2.k",
+                "1.A.2.l",
+                "1.A.2.m",
+            ]
+        },
+        # check if typed numbers add up to the total of 1.A.3 from the main table
+        "1.A.3": {
+            "sources": [
+                "1.A.3.a.ii",
+                "1.A.3.b.i.2",
+                "1.A.3.b.ii.2",
+                "1.A.3.b.iii",
+                "1.A.3.b.iv",
+                "1.A.3.c",
+                "1.A.3.d.ii",
+            ]
+        },
+        # check if the typed numbers add up to the total of 1.A.4.c in the same table
+        "1.A.4.c": {"sources": ["1.A.4.c.i", "1.A.4.c.iii"]},
+        # check if typed numbers add up to the total of 1.A.4 from the main table
+        "1.A.4": {"sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c"]},
+        # check if the typed numbers add up to the total of 1.A.4.c in the same table
+        "1.B.2.b.iii": {"sources": ["1.B.2.b.iii.4", "1.B.2.b.iii.5"]},
+        # consistency check for 1.B.2
+        "1.B.2": {"sources": ["1.B.2.b.iii"]},
+    },
+    # We don't have HFCs and PFCs in the report, hence basket_copy is not relevant
+    # "basket_copy": {
+    #     "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+    #     # "entities": ["HFCS", "PFCS"],
+    #     "source_GWP": gwp_to_use,
+    # },
+}
+
+# Note on downscaling: Data are always available for the same years: 2013-2019,
+# so temporal downscaling does not makes sense here.
+# TODO: Perhaps entity, category downscaling can be done?
+
+gas_baskets = {
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O"],
+}

+ 340 - 0
src/unfccc_ghg_data/unfccc_reader/Bangladesh/read_BGD_BUR1_from_pdf.py

@@ -0,0 +1,340 @@
+"""
+Read Bangladesh's BUR1 from pdf
+"""
+
+import camelot
+import numpy as np
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    downloaded_data_path,
+    extracted_data_path,
+    fix_rows,
+    process_data_for_country,
+)
+from unfccc_ghg_data.unfccc_reader.Bangladesh.config_bgd_bur1 import (
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    country_processing_step1,
+    filter_remove,
+    gas_baskets,
+    inv_conf,
+    inv_conf_per_year,
+    manually_typed,
+    meta_data,
+    values_to_correct,
+    wide_to_long_col_replace,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+
+    input_folder = downloaded_data_path / "UNFCCC" / "Bangladesh" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "Bangladesh"
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    pdf_file = "Updated_BUR1_Report_15_11_2023.pdf"
+    output_filename = "BGD_BUR1_2023_"
+    category_column = f"category ({coords_terminologies['category']})"
+    compression = dict(zlib=True, complevel=9)
+
+    def repl(m):  # noqa: D103
+        return m.group("code")
+
+    # ###
+    # 1. Read in main tables from the Annex
+    # ###
+    df_main = None
+    df_year = None
+    for year in reversed(list(inv_conf_per_year.keys())):
+        print("-" * 60)
+        print(f"Reading year {year}.")
+        print("-" * 60)
+        df_year = None
+        for page in inv_conf_per_year[year]["page_defs"].keys():
+            print(f"Reading table from page {page}.")
+
+            # read from PDF
+            tables_inventory_original = camelot.read_pdf(
+                str(input_folder / pdf_file),
+                pages=page,
+                table_areas=inv_conf_per_year[year]["page_defs"][page]["area"],
+                columns=inv_conf_per_year[year]["page_defs"][page]["cols"],
+                flavor="stream",
+                split_text=False,
+            )
+            print("Reading complete.")
+
+            df_page = tables_inventory_original[0].df
+
+            # cut rows at the top if needed
+            skip_rows_start = inv_conf_per_year[year]["page_defs"][page][
+                "skip_rows_start"
+            ]
+            if not skip_rows_start == 0:
+                df_page = df_page[skip_rows_start:]
+
+            # cut rows at the bottom if needed
+            skip_rows_end = inv_conf_per_year[year]["page_defs"][page]["skip_rows_end"]
+            if not skip_rows_end == 0:
+                df_page = df_page[:-skip_rows_end]
+
+            # stack the tables vertically
+            if df_year is None:
+                df_year = df_page
+            else:
+                df_year = pd.concat(
+                    [
+                        df_year,
+                        df_page,
+                    ],
+                    axis=0,
+                    join="outer",
+                ).reset_index(drop=True)
+
+        # fix content that spreads across multiple rows
+        if "rows_to_fix" in inv_conf_per_year[year]:
+            for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
+                print(f"Merge content for {n_rows=}")
+                df_year = fix_rows(
+                    df_year,
+                    rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
+                    col_to_use=0,
+                    n_rows=n_rows,
+                )
+
+        if "categories_to_drop" in inv_conf_per_year[year]:
+            for row in inv_conf_per_year[year]["categories_to_drop"]:
+                row_to_delete = df_year.index[df_year[0] == row][0]
+                df_year = df_year.drop(index=row_to_delete)
+
+        df_header = pd.DataFrame(
+            [inv_conf_per_year[year]["header"], inv_conf_per_year[year]["unit"]]
+        )
+        skip_rows = inv_conf_per_year[year]["skip_rows"]
+
+        df_year = pd.concat(
+            [df_header, df_year[skip_rows:]], axis=0, join="outer"
+        ).reset_index(drop=True)
+
+        df_year = pm2.pm2io.nir_add_unit_information(
+            df_year,
+            unit_row=1,
+            entity_row=0,
+            regexp_entity=".*",
+            regexp_unit=".*",
+            default_unit="Gg",
+        )
+
+        print("Added unit information.")
+
+        # set index
+        df_year = df_year.set_index(inv_conf["index_cols"])
+
+        # convert to long format
+        df_year_long = pm2.pm2io.nir_convert_df_to_long(
+            df_year, year, inv_conf["header_long"]
+        )
+
+        # extract from tuple
+        df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
+
+        # prep for conversion to PM2 IF and native format
+        # make a copy of the categories row
+        df_year_long["category"] = df_year_long["orig_cat_name"]
+
+        # first the manual replacements
+        df_year_long["category"] = df_year_long["category"].replace(
+            inv_conf_per_year[year]["cat_codes_manual"]
+        )
+
+        # Remove dots between letters in category codes
+        df_year_long["category"] = df_year_long["category"].str.replace(".", "")
+        # Some categories have a dash between the letters
+        df_year_long["category"] = df_year_long["category"].str.replace("-", " ")
+
+        # then the regex replacements
+        df_year_long["category"] = df_year_long["category"].str.replace(
+            inv_conf["cat_code_regexp"], repl, regex=True
+        )
+
+        df_year_long = df_year_long.reset_index(drop=True)
+
+        # make sure all col headers are str
+        df_year_long.columns = df_year_long.columns.map(str)
+
+        df_year_long = df_year_long.drop(columns=["orig_cat_name"])
+
+        # TODO Is there a better way to do this?
+        # merge duplicate categories and sum their values
+        if "merge_cats" in inv_conf_per_year[year]:
+            cat = inv_conf_per_year[year]["merge_cats"]
+            # filter by category to be merged
+            df_temp = df_year_long.loc[df_year_long["category"] == cat]
+            df_temp = df_temp.replace("", np.nan)
+            df_temp["data"] = df_temp["data"].apply(float)
+            # sum values for duplicate entries
+            df_temp = df_temp.groupby(
+                ["entity", "unit", "time", "category"], as_index=False
+            )["data"].sum()
+            # change back to empty strings
+            df_temp = df_temp.replace(0, "")
+            # drop category from df
+            df_year_long = df_year_long.drop(
+                df_year_long[df_year_long["category"] == cat].index
+            )
+            # append the summed up sub-set
+            df_year_long = pd.concat(
+                [df_temp, df_year_long],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+        if df_main is None:
+            df_main = df_year_long
+        else:
+            df_main = pd.concat(
+                [df_main, df_year_long],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    # ###
+    # 2. Add manually typed tables
+    # ###
+
+    df_typed = None
+    for figure in manually_typed.keys():
+        df_typed_figure = pd.DataFrame(manually_typed[figure]["data"])
+        df_typed_figure["entity"] = manually_typed[figure]["entity"]
+        df_typed_figure["unit"] = manually_typed[figure]["unit"]
+
+        # adjust column names for wide to long function
+        df_typed_figure = df_typed_figure.rename(columns=wide_to_long_col_replace)
+        df_typed_figure_long = pd.wide_to_long(
+            df_typed_figure, stubnames="data", i="category", j="time"
+        ).reset_index()
+
+        if "unit_conversion" in manually_typed[figure].keys():
+            df_typed_figure_long["unit"] = manually_typed[figure]["unit_conversion"][
+                "new_unit"
+            ]
+            conv_factor = manually_typed[figure]["unit_conversion"]["conversion_factor"]
+            df_typed_figure_long["data"] = df_typed_figure_long["data"].map(
+                lambda a: a / conv_factor
+            )
+
+        if df_typed is None:
+            df_typed = df_typed_figure_long
+        else:
+            df_typed = pd.concat(
+                [df_typed, df_typed_figure_long],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    # merge manually typed and main tables from Annex
+    df_main = pd.concat(
+        [df_main, df_typed],
+        axis=0,
+        join="outer",
+    ).reset_index(drop=True)
+
+    ### convert to interchange format ###
+    print("Converting to interchange format.")
+    df_main_IF = pm2.pm2io.convert_long_dataframe_if(
+        df_main,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        convert_str=True,
+        time_format="%Y",
+    )
+
+    # correct individual values
+    for category, entity, year, new_value in values_to_correct:
+        df_main_IF.loc[
+            (df_main_IF["category (IPCC2006_PRIMAP)"] == category)
+            & (df_main_IF["entity"] == entity),
+            year,
+        ] = new_value
+
+    ### convert to primap2 format ###
+    print("Converting to primap2 format.")
+    data_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
+
+    # # ###
+    # # Save raw data to IF and native format.
+    # # ###
+
+    data_if = data_pm2.pr.to_interchange_format()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder
+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+        encoding=encoding,
+    )
+
+    # # ###
+    # # Processing
+    # # ###
+
+    # create the gas baskets before aggregating the categories
+    data_proc_pm2_gas_baskets = process_data_for_country(
+        data_country=data_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        filter_dims=None,
+        cat_terminology_out=None,
+        category_conversion=None,
+        sectors_out=None,
+        processing_info_country=None,
+    )
+
+    data_proc_pm2 = process_data_for_country(
+        data_country=data_proc_pm2_gas_baskets,
+        entities_to_ignore=[],
+        gas_baskets=None,
+        filter_dims=None,
+        cat_terminology_out=None,
+        category_conversion=None,
+        sectors_out=None,
+        processing_info_country=country_processing_step1,
+    )
+
+    # # ###
+    # # save processed data to IF and native format
+    # # ###
+
+    terminology_proc = coords_terminologies["category"]
+
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc), data_proc_if
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
+    )
+
+    print("Saved processed data.")