22 Commit-ok c6e0a4aa05 ... 34de82b8d3

Szerző SHA1 Üzenet Dátum
  crdanielbusch 34de82b8d3 Merge pull request #102 from JGuetschow/saint-kitts-and-nevis 4 hónapja
  crdanielbusch 8dcf4987e9 Merge branch 'main' into saint-kitts-and-nevis 4 hónapja
  Daniel Busch 7b09b8d8ec [DATALAD RUNCMD] Read data for KNA, BUR1. 4 hónapja
  Daniel Busch 819cd82b58 [DATALAD RUNCMD] Read data for KNA, BUR1. 4 hónapja
  Daniel Busch 470028393a [DATALAD RUNCMD] Update folder mapping for src/unfccc_ghg_data/unfccc_reader 4 hónapja
  Daniel Busch 553f8430b0 downscale 1.B.2 4 hónapja
  Daniel Busch c7e12e9ac7 clean up 4 hónapja
  Johannes Gütschow 073b90d283 Merge pull request #109 from JGuetschow/CCPI_nAI_2024 4 hónapja
  Daniel Busch feaf21b336 updates from code review 4 hónapja
  Daniel Busch ee5d1c1324 downscaling 4 hónapja
  Daniel Busch a1407ec087 downscaling entities, some categories don't work 4 hónapja
  Daniel Busch 80e9351cf6 clean up 5 hónapja
  Daniel Busch aa7933f237 incosistent values 5 hónapja
  Daniel Busch 7f5acc59c7 category aggregation works 5 hónapja
  Daniel Busch ea6198cf20 cat aggregation 5 hónapja
  Daniel Busch b0767d4bef all tables, saved in raw format 5 hónapja
  Daniel Busch 9af5cfe477 energy industries trends 5 hónapja
  Daniel Busch bb46722613 first trend table 5 hónapja
  Daniel Busch 4f632f6c13 sector tables complete 5 hónapja
  Daniel Busch 9304d7b4d4 sector tables energy, ipuu, afolu 5 hónapja
  Daniel Busch 06f52b18bf first sector table 5 hónapja
  Daniel Busch 772e3e3296 first sector table 5 hónapja

+ 1 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/9v/x2/MD5E-s351090--38627ddb9a438ef129e2dcae12446bcc.csv/MD5E-s351090--38627ddb9a438ef129e2dcae12446bcc.csv

+ 1 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/F9/jk/MD5E-s278308--86ce712c212bae25a65853f7b7809828.nc/MD5E-s278308--86ce712c212bae25a65853f7b7809828.nc

+ 24 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,24 @@
+attrs:
+  references: https://unfccc.int/documents/633382
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Saint Kitts and Nevis. Biennial update report (BUR). BUR1 Processed on 2024-09-09
+    Processed on 2024-09-09
+  comment: Read fom pdf by Daniel Busch Processed on 2024-09-09 Processed on 2024-09-09
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+  gwp_context: AR5GWP100
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - area (ISO3)
+  - source
+  - category (IPCC2006_PRIMAP)
+  - entity
+  - unit
+data_file: KNA_BUR1_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP_raw.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/Gj/fq/MD5E-s165610--0c8f9de46f98900a9ed7e6b12e17325c.csv/MD5E-s165610--0c8f9de46f98900a9ed7e6b12e17325c.csv

+ 1 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP_raw.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/wk/7V/MD5E-s129950--b38f79387900334f1e9c23ed70a4e798.nc/MD5E-s129950--b38f79387900334f1e9c23ed70a4e798.nc

+ 22 - 0
extracted_data/UNFCCC/Saint_Kitts_and_Nevis/KNA_BUR1_2023_IPCC2006_PRIMAP_raw.yaml

@@ -0,0 +1,22 @@
+attrs:
+  references: https://unfccc.int/documents/633382
+  rights: ''
+  contact: daniel-busch@climate-resource.de
+  title: Saint Kitts and Nevis. Biennial update report (BUR). BUR1
+  comment: Read fom pdf by Daniel Busch
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - area (ISO3)
+  - source
+  - category (IPCC2006_PRIMAP)
+  - entity
+  - unit
+data_file: KNA_BUR1_2023_IPCC2006_PRIMAP_raw.csv

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/__init__.py

@@ -0,0 +1,30 @@
+"""Saint Kitts and Nevis' BURs, NIRs, NCs
+
+Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'KNA'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=KNA
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 644 - 0
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/config_kna_bur1.py

@@ -0,0 +1,644 @@
+"""
+Configuration file to read Saint Kitts and Nevis' BUR 1.
+"""
+
+gwp_to_use = "AR5GWP100"
+
+# primap2 format conversion
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_defaults = {
+    "source": "KNA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "KNA",
+    "scenario": "BUR1",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": "PRIMAP1",
+    "entity": {
+        "NMVOCs": "NMVOC",
+        "HFCS": f"HFCS ({gwp_to_use})",
+        "PFCS": f"PFCS ({gwp_to_use})",
+        "SF6": f"SF6 ({gwp_to_use})",
+        "Other halogenated gases with CO2 equivalent conversion factors (1)": f"UnspMixOfHFCs ({gwp_to_use})",
+    },
+}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/633382",
+    "rights": "",  # unknown
+    "contact": "daniel-busch@climate-resource.de",
+    "title": "Saint Kitts and Nevis. Biennial update report (BUR). BUR1",
+    "comment": "Read fom pdf by Daniel Busch",
+    "institution": "UNFCCC",
+}
+
+filter_remove = {
+    "f_memo": {"category": "MEMO"},
+    "f1": {
+        "entity": "Other halogenated gases without CO2 equivalent conversion factors (2)"
+    },
+    "f2": {"entity": "3D2LULUCF"},
+}
+
+conf_general = {
+    "cat_code_regexp": r"^(?P<code>[a-zA-Z0-9\.]{1,11})[\s\.].*",
+}
+
+conf_trend = {
+    "fugitive": {
+        "rows_to_fix": {2: ["1.B.3 - Other emissions from"]},
+        "page_defs": {
+            "125": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 1,
+            },
+            "126": {
+                "read_params": dict(
+                    flavor="stream",
+                    table_areas=["72,681,564,638"],
+                    columns=["203,238,272,305,340,370,402,439,469,504,536"],
+                ),
+                "skip_rows_start": 1,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "other_sectors": {
+        "page_defs": {
+            "123": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "transport_sub": {
+        "page_defs": {
+            "121": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            },
+            "122": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 0,
+            },
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "transport": {
+        "page_defs": {
+            "119": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            }
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "manufacturing_and_construction": {
+        "page_defs": {
+            "118": {
+                "read_params": dict(flavor="lattice"),
+                "skip_rows_start": 2,
+            }
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+    },
+    "energy_industries": {
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "replace_data_entries": {"NO,NE": "NO"},
+        "cat_codes_manual": {
+            "a. Public electricity and heat production": "1.A.1.a",
+            "b. Petroleum refining": "1.A.1.b",
+            "c. Manufacture of solid fuels": "1.A.1.c",
+        },
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [],
+        "rows_to_fix": {3: ["a. Public electricity and heat"]},
+        "page_defs": {
+            "116": {
+                "read_params": dict(
+                    flavor="stream",
+                    table_areas=["72,426,543,333"],
+                    columns=["199,229,261,293,324,356,386,416,448,480,511"],
+                ),
+                "skip_rows_start": 2,
+            },
+        },
+    },
+    "overview": {
+        # Inconsistencies for table page 11 and page 125 for categories 1.B, 1.B.1
+        "rows_to_drop": ["1B", "1B1"],
+        "fix_single_value": {
+            "cat": "MBIO",
+            "year": "2018",
+            "new_value": "0.17",
+        },
+        "entity": f"KYOTOGHG ({gwp_to_use})",
+        "unit": "GgCO2eq",
+        "replace_data_entries": {"NO,NE": "NO"},
+        "cat_codes_manual": {
+            "Total CO2 Eq. Emissions without  LULUCF": "M.0.EL",
+            "Total CO2 Eq. Emissions with  LULUCF": "0",
+            # "1. Energy": "1. Energy",
+            "A. Fuel Combustion": "1.A",
+            "1.  Energy Industries": "1.A.1",
+            "2.  Man. Ind. & Constr.": "1.A.2",
+            "3.  Transport": "1.A.3",
+            "4.  Other Sectors": "1.A.4",
+            "5.  Other": "1.A.5",
+            "B. Fugitive Emissions from Fuels": "1.B",
+            "1.  Solid Fuels": "1.B.1",
+            "2.  Oil and Natural Gas and other…": "M.1.B.23",
+            # "2.  Industrial Processes": "2.  Industrial Processes",
+            "A.  Mineral Industry": "2.A",
+            "B.  Chemical Industry": "2.B",
+            "C.  Metal Industry": "2.C",
+            "D.  Non-energy products": "2.D",
+            "E.  Electronics industry": "2.E",
+            "F.  Product uses as ODS substitutes": "2.F",
+            "G.  Other product manufacture and": "2.G",
+            "use  H.  Other": "2.H",
+            "3.  Agriculture": "M.AG",
+            "A.  Enteric Fermentation": "3.A.1",
+            "B.  Manure Management": "3.A.2",
+            "C.  Rice Cultivation": "3.C.7",
+            "D.  Agricultural Soils": "3.C.4",
+            "E.  Prescribed Burning of Savannahs": "3.C.1.c",
+            "F.  Field Burning of Agricultural": "3.C.1.b",
+            "Residues  G.  Liming": "3.C.2",
+            "H.  Urea applications": "3.C.3",
+            "I.  Other carbon-containing": "M.3.D.2.AG",
+            "fertilisers  4. Land Use, Land-Use Change and  Forestry": "M.LULUCF",
+            "A. Forest Land": "3.B.1",
+            "B. Cropland": "3.B.2",
+            "C. Grassland": "3.B.3",
+            "D. Wetlands": "3.B.4",
+            "E. Settlements": "3.B.5",
+            "F. Other Land": "3.B.6",
+            "G. Harvested wood products": "3.D.1",
+            "H. Other": "3.D.2.LULUCF",
+            "5. Waste": "4",
+            "A.  Solid Waste Disposal": "4.A",
+            "B.  Biological treatment of solid": "4.B",
+            "waste  C. Incineration and open burning of": "4.C",
+            "D. Waste water treatment and": "4.D",
+            "discharge  E.  Other": "4.E",
+            "6.  Other": "5",
+            "CO2 Emissions from Biomass": "M.BIO",
+        },
+        "drop_cols": [
+            "change to BY",
+            "change to PY",
+        ],
+        "header": ["orig_category"],
+        "years": [
+            "2008",
+            "2009",
+            "2010",
+            "2011",
+            "2012",
+            "2013",
+            "2014",
+            "2015",
+            "2016",
+            "2017",
+            "2018",
+        ],
+        "extra_columns": [
+            "change to BY",
+            "change to PY",
+        ],
+        "split_values": {
+            "cat": "3B2",
+            "keep_value_no": 1,
+        },
+        "page_defs": {
+            "111": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
+            "112": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
+            "113": {"read_params": dict(flavor="lattice"), "skip_rows_start": 1},
+        },
+    },
+}
+
+conf = {
+    "energy": {
+        "entities": ["CO2", "CH4", "N2O", "NOX", "CO", "NMVOCs", "SO2"],
+        "header": ["orig_category"],
+        "cat_codes_manual": {
+            "Information Items": "MEMO",
+            "CO2 from Biomass Combustion for Energy Production": "MBIO",
+        },
+        "page_defs": {
+            "149": {"skip_rows_start": 2},
+            "150": {"skip_rows_start": 2},
+            "151": {"skip_rows_start": 2},
+            "152": {"skip_rows_start": 2},
+        },
+        "replace_data_entries": {
+            "NO,NE": "NO",
+            "NE,NO": "NO",
+            "NO,IE": "NO",
+        },
+        "unit_mapping": {
+            "CO2": "Gg",
+            "CH4": "Gg",
+            "N2O": "Gg",
+            "NOX": "Gg",
+            "CO": "Gg",
+            "NMVOCs": "Gg",
+            "SO2": "Gg",
+        },
+    },
+    "ipuu": {
+        "entities": [
+            "CO2",
+            "CH4",
+            "N2O",
+            "HFCS",
+            "PFCS",
+            "SF6",
+            "Other halogenated gases with CO2 equivalent conversion factors (1)",
+            "Other halogenated gases without CO2 equivalent conversion factors (2)",
+            "NOX",
+            "CO",
+            "NMVOC",
+            "SO2",
+        ],
+        "header": ["orig_category"],
+        "cat_codes_manual": {
+            "Information Items": "MEMO",
+            "CO2 from Biomass Combustion for Energy Production": "MBIO",
+        },
+        "page_defs": {
+            "153": {"skip_rows_start": 2},
+            "154": {"skip_rows_start": 2},
+            "155": {"skip_rows_start": 2},
+        },
+        "replace_data_entries": {
+            "NO,NE": "NO",
+            "NE,NO": "NO",
+            "NO,IE": "NO",
+        },
+        "unit_mapping": {
+            "CO2": "Gg",
+            "CH4": "Gg",
+            "N2O": "Gg",
+            "HFCS": "GgCO2eq",
+            "PFCS": "GgCO2eq",
+            "SF6": "GgCO2eq",
+            "Other halogenated gases with CO2 equivalent conversion factors (1)": "GgCO2eq",
+            "Other halogenated gases without CO2 equivalent conversion factors (2)": "Gg",
+            "NOX": "Gg",
+            "CO": "Gg",
+            "NMVOC": "Gg",
+            "SO2": "Gg",
+        },
+    },
+    "AFOLU": {
+        "entities": [
+            "CO2",
+            "CH4",
+            "N2O",
+            "NOX",
+            "CO",
+            "NMVOC",
+        ],
+        "header": ["orig_category"],
+        "cat_codes_manual": {
+            "Information Items": "MEMO",
+            "CO2 from Biomass Combustion for Energy Production": "MBIO",
+        },
+        "page_defs": {
+            "156": {"skip_rows_start": 3},
+            "157": {"skip_rows_start": 3},
+            "158": {"skip_rows_start": 3},
+        },
+        "replace_data_entries": {
+            "NO,NA": "NO",
+            "NO,NE": "NO",
+            "NE,NO": "NO",
+            "NO,IE": "NO",
+        },
+        "unit_mapping": {
+            "CO2": "Gg",
+            "CH4": "Gg",
+            "N2O": "Gg",
+            "NOX": "Gg",
+            "CO": "Gg",
+            "NMVOC": "Gg",
+        },
+    },
+    "waste": {
+        "entities": [
+            "CO2",
+            "CH4",
+            "N2O",
+            "NOX",
+            "CO",
+            "NMVOC",
+            "SO2",
+        ],
+        "header": ["orig_category"],
+        "cat_codes_manual": {
+            "Information Items": "MEMO",
+            "CO2 from Biomass Combustion for Energy Production": "MBIO",
+        },
+        "page_defs": {
+            "159": {"skip_rows_start": 2},
+        },
+        "replace_data_entries": {
+            "NO,NA": "NO",
+            "NO,NE": "NO",
+            "NE,NO": "NO",
+            "NO,IE": "NO",
+        },
+        "unit_mapping": {
+            "CO2": "Gg",
+            "CH4": "Gg",
+            "N2O": "Gg",
+            "NOX": "Gg",
+            "CO": "Gg",
+            "NMVOC": "Gg",
+            "SO2": "Gg",
+        },
+    },
+}
+
+fix_values_main = [
+    # numbers don't add up for 3.A
+    ("3A2", "CH4", "0.03"),
+    ("3A2", "N2O", "0"),
+    # numbers don't add up for 1.B
+    ("1B2a", "CO2", "0.002288"),  # value from 1.B nowhere in sub-categories
+    ("1B2aiii", "CO2", "0.002288"),  # value from 1.B.2.a nowhere in sub-categories
+    ("1B2aiii3", "CO2", "0.002288"),  # value from 1.B.2.a.iii nowhere in sub-categories
+]
+
+fix_values_trend = [
+    # Most of the values for (KYOTOGHG (AR5GWP100)) don't match
+    # with the values from the main table.
+    # Replacing with values from main table
+    # energy
+    ("1A3bi", "2018", "64.74"),  # (category, year, new_value)
+    ("1A3bi1", "2018", "64.7"),
+    ("1A3bii", "2018", "12.36"),
+    ("1A3bii1", "2018", "11.07"),
+    ("1A3bii2", "2018", "1.28"),
+    ("1A3biii", "2018", "23.66"),
+    ("1A3biv", "2018", "0.16"),
+    ("1A3c", "2018", "0.17"),
+    ("1B", "2018", "0.002288"),
+    ("1B2", "2018", "0.002288"),
+    ("1B2a", "2018", "0.002288"),
+    ("1B2aiii", "2018", "0.002288"),
+    ("1B2aiii3", "2018", "0.002288"),
+    # agriculture
+    ("3A1", "2018", "5.04"),
+    ("3A2", "2018", "0.84"),
+    ("3C4", "2018", "2.65"),
+    ("MAG", "2018", "8.54"),
+    # lulucf
+    # There are missing numbers in "Forest Land" - 3.B.1 on page 112
+    # I found them as invisible numbers in the row below
+    # but deleted them because I didn't know where they belong.
+    # Leaving it as it is now, but numbers could be added upstream TODO
+    ("3B1", "2008", "-130.02"),
+    ("3B1", "2009", "-130.02"),
+    ("3B1", "2010", "-130.02"),
+    ("3B1", "2011", "-151.6"),
+    ("3B1", "2012", "-151.6"),
+    ("3B1", "2013", "-151.6"),
+    ("3B1", "2014", "-140.34"),
+    ("3B1", "2015", "-140.34"),
+    ("3B1", "2016", "-140.34"),
+    ("3B1", "2017", "-140.34"),
+    ("3B1", "2018", "-140.34"),
+    # waste
+    ("4D", "2018", "12.32"),
+    ("4C", "2018", "0.03"),
+    ("4A", "2018", "45.92"),
+    ("4", "2018", "58.27"),
+]
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}
+
+country_processing_step1 = {
+    "tolerance": 0.01,
+    "aggregate_cats": {
+        "M.3.D.AG": {"sources": ["M.3.D.2.AG"]},
+        "3.C.1.AG": {"sources": ["3.C.1.b", "3.C.1.c"]},
+        "M.3.C.AG": {
+            "sources": [
+                "3.C.1.AG",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ],
+        },
+        "M.AG.ELV": {
+            "sources": ["M.3.C.AG", "M.3.D.AG"],
+        },
+        "3.A": {"sources": ["3.A.1", "3.A.2"]},
+        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"]},
+        "3.C": {
+            "sources": [
+                "3.C.1",
+                "3.C.2",
+                "3.C.3",
+                "3.C.4",
+                "3.C.5",
+                "3.C.6",
+                "3.C.7",
+                "3.C.8",
+            ]
+        },
+        "3.D": {"sources": ["3.D.1", "3.D.2"]},
+        "M.AG": {"sources": ["3.A", "M.AG.ELV"]},
+        "3.C.1.LU": {"sources": ["3.C.1.a", "3.C.1.d"]},
+        "M.3.D.LU": {"sources": ["3.D.1"]},
+        "M.LULUCF": {"sources": ["3.B", "3.C.1.LU", "M.3.D.LU"]},
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+        },
+        "3": {"sources": ["M.AG", "M.LULUCF"]},  # consistency check
+        "0": {"sources": ["1", "2", "3", "4", "5"]},  # consistency check
+    },
+    "basket_copy": {
+        "GWPs_to_add": ["AR4GWP100", "SARGWP100", "AR6GWP100"],
+        "entities": ["HFCS", "PFCS", "UnspMixOfHFCs"],
+        "source_GWP": gwp_to_use,
+    },
+}
+
+country_processing_step2 = {
+    "downscale": {
+        # "sectors": {
+        #     "M.1.B.23": {
+        #         "basket": "M.1.B.23",
+        #         "basket_contents": ["1.B.2", "1.B.3"],
+        #         "entities": ["KYOTOGHG (AR5GWP100)"],
+        #         "dim": f'category ({coords_terminologies["category"]})',
+        #     },
+        # },
+        "entities": {
+            "KYOTO": {
+                "basket": "KYOTOGHG (AR5GWP100)",
+                "basket_contents": [
+                    "CH4",
+                    "CO2",
+                    "N2O",
+                    "HFCS (AR5GWP100)",
+                    "PFCS (AR5GWP100)",
+                    "SF6",
+                ],
+                "sel": {
+                    f'category ({coords_terminologies["category"]})': [
+                        "1",
+                        "1.A",
+                        "1.B",
+                        "1.B.2",
+                        # "1.B.3" # all zero -> doesn't work
+                        # "1.C",  # we don't have trend values for 1.C
+                        # Downscaling currently doesn't work for all zero basket content, see
+                        # https://github.com/pik-primap/primap2/issues/254#issue-2491434285
+                        # "2",  # all zero -> doesn't work
+                        # "2.A",  # all zero -> doesn't work
+                        # "2.B",  # all zero -> doesn't work
+                        # "2.C",  # all zero -> doesn't work
+                        # "2.D",  # all zero -> doesn't work
+                        # "2.E",  # all zero -> doesn't work
+                        # "2.F",  # all zero -> doesn't work
+                        # "2.G",  # all zero -> doesn't work
+                        # "2.H",  # all zero -> doesn't work
+                        "3",
+                        "3.A",
+                        "3.B",
+                        "3.C",
+                        "3.D",
+                        "4",
+                    ]
+                },
+            },
+        },
+    },
+}

+ 395 - 0
src/unfccc_ghg_data/unfccc_reader/Saint_Kitts_and_Nevis/read_KNA_BUR1_from_pdf.py

@@ -0,0 +1,395 @@
+"""
+Read Saint Kitts and Nevis' BUR1 from pdf
+"""
+import camelot
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    downloaded_data_path,
+    extracted_data_path,
+    fix_rows,
+    process_data_for_country,
+)
+from unfccc_ghg_data.unfccc_reader.Saint_Kitts_and_Nevis.config_kna_bur1 import (
+    conf,
+    conf_general,
+    conf_trend,
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    country_processing_step1,
+    country_processing_step2,
+    filter_remove,
+    fix_values_main,
+    fix_values_trend,
+    gas_baskets,
+    meta_data,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+
+    input_folder = downloaded_data_path / "UNFCCC" / "Saint_Kitts_and_Nevis" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "Saint_Kitts_and_Nevis"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    pdf_file = "First_BUR_St.Kitts_Nevis.pdf"
+    output_filename = "KNA_BUR1_2023_"
+    compression = dict(zlib=True, complevel=9)
+
+    def repl(m):  # noqa: D103
+        return m.group("code")
+
+    # ###
+    # 1. Read trend tables
+    # ###
+
+    df_trend = None
+    for table in conf_trend.keys():
+        print("-" * 45)
+        print(f"Reading {table} trend table.")
+        df_table = None
+        for page in conf_trend[table]["page_defs"].keys():
+            print(f"Page {page}")
+            tables_inventory_original = camelot.read_pdf(
+                str(input_folder / pdf_file),
+                pages=page,
+                split_text=True,
+                **conf_trend[table]["page_defs"][page]["read_params"],
+            )
+
+            df_page = tables_inventory_original[0].df
+
+            skip_rows_start = conf_trend[table]["page_defs"][page]["skip_rows_start"]
+            if not skip_rows_start == 0:
+                df_page = df_page[skip_rows_start:]
+
+            if df_table is None:
+                # Reset index to avoid pandas' SettingWithCopyWarning
+                df_table = df_page.reset_index(drop=True)
+            else:
+                df_table = pd.concat(
+                    [
+                        df_table,
+                        df_page,
+                    ],
+                    axis=0,
+                    join="outer",
+                ).reset_index(drop=True)
+
+        # fix content that spreads across multiple rows
+        if "rows_to_fix" in conf_trend[table]:
+            for n_rows in conf_trend[table]["rows_to_fix"].keys():
+                print(f"Merge content for {n_rows=}")
+                df_table = fix_rows(
+                    df_table,
+                    rows_to_fix=conf_trend[table]["rows_to_fix"][n_rows],
+                    col_to_use=0,
+                    n_rows=n_rows,
+                )
+
+        df_table.columns = (
+            conf_trend[table]["header"]
+            + conf_trend[table]["years"]
+            + conf_trend[table]["extra_columns"]
+        )
+
+        # drop columns if needed
+        if "drop_cols" in conf_trend[table].keys():
+            df_table = df_table.drop(columns=conf_trend[table]["drop_cols"])
+
+        # category codes from category names
+        df_table["category"] = df_table["orig_category"]
+        # Remove line break characters
+        df_table["category"] = df_table["category"].str.replace("\n", " ")
+        # first the manual replacements
+        if "cat_codes_manual" in conf_trend[table].keys():
+            df_table["category"] = df_table["category"].replace(
+                conf_trend[table]["cat_codes_manual"]
+            )
+        # remove dots from category codes
+        df_table["category"] = df_table["category"].str.replace(".", "")
+        # then the regex replacements
+        df_table["category"] = df_table["category"].str.replace(
+            conf_general["cat_code_regexp"], repl, regex=True
+        )
+        df_table = df_table.drop(columns="orig_category")
+
+        # drop rows if needed
+        if "rows_to_drop" in conf_trend[table].keys():
+            for row in conf_trend[table]["rows_to_drop"]:
+                row_to_delete = df_table.index[df_table["category"] == row][0]
+                df_table = df_table.drop(index=row_to_delete)
+
+        # bring values in right format
+        for year in conf_trend[table]["years"]:
+            if "replace_data_entries" in conf_trend[table].keys():
+                df_table[year] = df_table[year].replace(
+                    conf_trend[table]["replace_data_entries"]
+                )
+            df_table[year] = df_table[year].str.replace("\n", "")
+            df_table[year] = df_table[year].str.replace(",", ".")
+            # There are "invisible" numbers in trend table on page 112, "A. Forest Land"
+            # I'm removing them here, but they actually belong to the above,
+            # which I didn't know when I wrote this code
+            # TODO: Invisible values can be added to row above directly
+            if "split_values" in conf_trend[table].keys():
+                cat = conf_trend[table]["split_values"]["cat"]
+                keep_value_no = conf_trend[table]["split_values"]["keep_value_no"]
+                new_value = (
+                    df_table.loc[df_table["category"] == cat, year]
+                    .item()
+                    .split(" ")[keep_value_no]
+                )
+                df_table.loc[df_table["category"] == cat, year] = new_value
+
+        if "fix_single_value" in conf_trend[table].keys():
+            cat = conf_trend[table]["fix_single_value"]["cat"]
+            year = conf_trend[table]["fix_single_value"]["year"]
+            new_value = conf_trend[table]["fix_single_value"]["new_value"]
+            df_table.loc[df_table["category"] == cat, year] = new_value
+
+        df_table["unit"] = conf_trend[table]["unit"]
+        df_table["entity"] = conf_trend[table]["entity"]
+
+        # stack the tables vertically
+        if df_trend is None:
+            df_trend = df_table.reset_index(drop=True)
+        else:
+            df_trend = pd.concat(
+                [
+                    df_trend,
+                    df_table,
+                ],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    # some categories present in trend table on page 112 and the following detailed
+    # tables for the sub-categories
+    df_trend = df_trend.drop_duplicates()
+
+    for cat, year, new_value in fix_values_trend:
+        # make sure there is exactly one value that matches the filter
+        # TODO ruff wants to remove the assert statements here
+        assert len(df_trend.loc[df_trend["category"] == cat, year]) == 1  # noqa: S101
+        df_trend.loc[df_trend["category"] == cat, year] = new_value
+
+    df_trend_if = pm2.pm2io.convert_wide_dataframe_if(
+        df_trend,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+    )
+    #
+    ### convert to primap2 format ###
+    print("Converting to primap2 format.")
+    data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_if)
+
+    # ###
+    # 2. Read in main tables
+    # ###
+
+    df_main = None
+    for sector in conf.keys():
+        print("-" * 45)
+        print(f"Reading table for {sector}.")
+
+        df_sector = None
+        for page in conf[sector]["page_defs"].keys():
+            print(f"Page {page}")
+            tables_inventory_original = camelot.read_pdf(
+                str(input_folder / pdf_file),
+                pages=page,
+                flavor="lattice",
+            )
+
+            df_page = tables_inventory_original[0].df
+
+            skip_rows_start = conf[sector]["page_defs"][page]["skip_rows_start"]
+            if not skip_rows_start == 0:
+                df_page = df_page[skip_rows_start:]
+
+            if df_sector is None:
+                # Reset index to avoid pandas' SettingWithCopyWarning
+                df_sector = df_page.reset_index(drop=True)
+            else:
+                df_sector = pd.concat(
+                    [
+                        df_sector,
+                        df_page,
+                    ],
+                    axis=0,
+                    join="outer",
+                ).reset_index(drop=True)
+
+        df_sector.columns = conf[sector]["header"] + conf[sector]["entities"]
+
+        df_sector["category"] = df_sector["orig_category"]
+
+        # Remove line break characters
+        df_sector["category"] = df_sector["category"].str.replace("\n", " ")
+
+        # first the manual replacements
+        df_sector["category"] = df_sector["category"].replace(
+            conf[sector]["cat_codes_manual"]
+        )
+
+        # remove dots from category codes
+        df_sector["category"] = df_sector["category"].str.replace(".", "")
+
+        # then the regex replacements
+        df_sector["category"] = df_sector["category"].str.replace(
+            conf_general["cat_code_regexp"], repl, regex=True
+        )
+
+        df_sector = df_sector.drop(columns="orig_category")
+
+        # bring in long format, so it can be concatenated with other tables
+        df_sector = pd.melt(
+            df_sector,
+            id_vars="category",
+            value_vars=conf[sector]["entities"],
+        )
+
+        # pd.melt always outputs value and variable as column names
+        df_sector = df_sector.rename({"value": "data", "variable": "entity"}, axis=1)
+
+        # clean data column
+        df_sector["data"] = df_sector["data"].replace(
+            conf[sector]["replace_data_entries"]
+        )
+        df_sector["data"] = df_sector["data"].str.replace(",", ".")
+        df_sector["data"] = df_sector["data"].str.replace("\n", "")
+
+        df_sector["unit"] = df_sector["entity"].replace(conf[sector]["unit_mapping"])
+
+        if df_main is None:
+            df_main = df_sector
+        else:
+            df_main = pd.concat(
+                [
+                    df_sector,
+                    df_main,
+                ],
+                axis=0,
+                join="outer",
+            ).reset_index(drop=True)
+
+    # year is the same for all sector tables
+    df_main["time"] = "2018"
+
+    # fix values
+    for cat, ent, new_value in fix_values_main:
+        # Make sure value to replace is found in data frame
+        # TODO ruff wants to remove the assert statements here
+        assert (  # noqa: S101
+            len(
+                df_main.loc[
+                    (df_main["category"] == cat) & (df_main["entity"] == ent), "data"
+                ]
+            )
+            == 1
+        )
+        df_main.loc[
+            (df_main["category"] == cat) & (df_main["entity"] == ent), "data"
+        ] = new_value
+
+    ### convert to interchange format ###
+    print("Converting to interchange format.")
+    df_main_IF = pm2.pm2io.convert_long_dataframe_if(
+        df_main,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        convert_str=True,
+        time_format="%Y",
+    )
+
+    ### convert to primap2 format ###
+    print("Converting to primap2 format.")
+    data_main_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
+
+    # # ###
+    # # Merge tables.
+    # # ###
+
+    print("Merging main and trend table.")
+    data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
+
+    # # ###
+    # # Save raw data to IF and native format.
+    # # ###
+
+    data_if = data_pm2.pr.to_interchange_format()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder
+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
+        encoding=encoding,
+    )
+
+    # # ###
+    # # Processing
+    # # ###
+
+    # create the gas baskets before aggregating the categories
+    data_proc_pm2 = process_data_for_country(
+        data_country=data_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        filter_dims=None,
+        cat_terminology_out=None,
+        category_conversion=None,
+        sectors_out=None,
+        processing_info_country=country_processing_step1,
+    )
+
+    data_proc_pm2 = process_data_for_country(
+        data_country=data_proc_pm2,
+        entities_to_ignore=[],
+        gas_baskets={},
+        filter_dims=None,
+        cat_terminology_out=None,
+        category_conversion=None,
+        sectors_out=None,
+        processing_info_country=country_processing_step2,
+    )
+
+    # # ###
+    # # save processed data to IF and native format
+    # # ###
+
+    terminology_proc = coords_terminologies["category"]
+
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc), data_proc_if
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
+    )
+
+    print("Saved processed data.")