10 månader sedan · c09c78267a
--- a/.copier-answers.yml
+++ b/.copier-answers.yml
@@ -1,8 +1,8 @@
 
				-# Changes here will be overwritten by Copier
			
 
				-_commit: v0.3.0
			
 
				+# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
			
 
				+_commit: v0.6.2
			
 
				 _src_path: ../../../helper_tools/copier-core-python-repository/
			
 
				 email: mail@johannes-guetschow.de
			
 
				-initial_setup: true
			
 
				+initial_setup: false
			
 
				 name: Johannes Gütschow
			
 
				 notebook_dependencies: false
			
 
				 pandas_doctests: false
			
@@ -16,3 +16,4 @@ project_name_human: Country greenhouse gas data submitted to the UNFCCC
 
				 project_name_pip: unfccc-ghg-data
			
 
				 project_name_python: unfccc_ghg_data
			
 
				 project_url: https://github.com/JGuetschow/UNFCCC_non-AnnexI_data
			
 
				+track_lock_file: true
			
--- a/.github/ISSUE_TEMPLATE/bug.md
+++ b/.github/ISSUE_TEMPLATE/bug.md
@@ -0,0 +1,41 @@
 
				+---
			
 
				+name: Bug report
			
 
				+about: Report a bug
			
 
				+title: ''
			
 
				+labels: bug
			
 
				+assignees: ''
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Describe the bug
			
 
				+<!--- A clear and concise description of what the bug is. -->
			
 
				+
			
 
				+## Failing Test
			
 
				+<!---
			
 
				+Please put the code (ideally in the form of a unit test) which fails below.
			
 
				+
			
 
				+e.g.
			
 
				+
			
 
				+```python
			
 
				+def test_bug_12():
			
 
				+    # Python code here which fails because of the bug
			
 
				+    # This is best if other developers can simply copy and paste this test in
			
 
				+    # order to run it
			
 
				+```
			
 
				+-->
			
 
				+
			
 
				+## Expected behavior
			
 
				+<!--- A clear and concise description of what you expected to happen. -->
			
 
				+
			
 
				+## Screenshots
			
 
				+<!--- If applicable, add screenshots to help explain your problem. -->
			
 
				+
			
 
				+## System
			
 
				+<!--- Please complete the following information. -->
			
 
				+
			
 
				+ - OS: [e.g. Windows, Linux, macOS]
			
 
				+ - Python version [e.g. Python 3.11]
			
 
				+ - Please also upload your `poetry.lock` file (first run `poetry lock` to make sure the lock file is up-to-date)
			
 
				+
			
 
				+## Additional context
			
 
				+<!--- Add any other context about the problem here. -->
			
--- a/.github/ISSUE_TEMPLATE/default.md
+++ b/.github/ISSUE_TEMPLATE/default.md
@@ -0,0 +1,21 @@
 
				+---
			
 
				+name: Default
			
 
				+about: Report an issue or problem
			
 
				+title: ''
			
 
				+labels: triage
			
 
				+assignees: ''
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## The problem
			
 
				+<!--- Useful to breakdown to "As a [persona], I [want to do], so that [reason] -->
			
 
				+
			
 
				+## Definition of "done"
			
 
				+<!---
			
 
				+What are the things that must be true in order to close this issue
			
 
				+
			
 
				+We find that describing these as dot points works well.
			
 
				+-->
			
 
				+
			
 
				+## Additional context
			
 
				+<!--- Add any additional context can go here -->
			
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,32 @@
 
				+---
			
 
				+name: Feature Request
			
 
				+about: Request a feature or suggest an idea for this project
			
 
				+title: ''
			
 
				+labels: feature
			
 
				+assignees: ''
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## The motivation
			
 
				+
			
 
				+<!--- Useful to breakdown to "As a [persona], I [want to do], so that [reason] -->
			
 
				+
			
 
				+## The proposed solution
			
 
				+
			
 
				+<!---
			
 
				+If you'd like, please provide a description of the solution you would like to see
			
 
				+
			
 
				+If you don't have any ideas for the solution, simply leave this blank
			
 
				+-->
			
 
				+
			
 
				+## Alternatives
			
 
				+
			
 
				+<!---
			
 
				+If you've considered any alternatives, please describe them here
			
 
				+
			
 
				+If you don't have any alternatives, simply leave this blank
			
 
				+-->
			
 
				+
			
 
				+## Additional context
			
 
				+
			
 
				+<!--- Add any additional context can go here -->
			
--- a/.github/actions/setup/action.yml
+++ b/.github/actions/setup/action.yml
@@ -2,6 +2,10 @@ name: "Setup Python and Poetry"
 
				 description: "setup Python and Poetry with caches"
			
 
				 
			
 
				 inputs:
			
 
				+  os:
			
 
				+    description: "Operating system to use"
			
 
				+    required: false
			
 
				+    default: "ubuntu-latest"
			
 
				   python-version:
			
 
				     description: "Python version to use"
			
 
				     required: true
			
@@ -20,38 +24,40 @@ inputs:
 
				 runs:
			
 
				   using: "composite"
			
 
				   steps:
			
 
				+    - name: Install poetry
			
 
				+      shell: bash
			
 
				+      run: |
			
 
				+        pipx install poetry
			
 
				+        which poetry
			
 
				+        poetry --version  # Check poetry installation
			
 
				+
			
 
				     - name: Set up Python ${{ inputs.python-version }}
			
 
				       id: setup-python
			
 
				-      uses: actions/setup-python@v4
			
 
				+      uses: actions/setup-python@v5
			
 
				       with:
			
 
				         python-version: ${{ inputs.python-version }}
			
 
				-    - name: Load cached Poetry installation
			
 
				-      id: cached-poetry
			
 
				-      uses: actions/cache@v3
			
 
				-      with:
			
 
				-        path: ~/.local  # the path depends on the OS
			
 
				-        key: poetry-2  # increment to reset cache
			
 
				-    - name: Install Poetry
			
 
				-      if: steps.cached-poetry.outputs.cache-hit != 'true'
			
 
				-      uses: snok/install-poetry@v1
			
 
				-    - name: Load cached venv
			
 
				-      if: ${{ inputs.run-poetry-install == 'true' }}
			
 
				-      id: cached-poetry-dependencies
			
 
				-      uses: actions/cache@v3
			
 
				-      with:
			
 
				-        path: .venv
			
 
				-        key: "venv-${{ inputs.venv-id }}-${{ inputs.python-version }}-${{ hashFiles('**/poetry.lock') }}"
			
 
				-    # Install dependencies first so that our package doesn't appear in the cache by accident
			
 
				-    - name: Install dependencies
			
 
				-      if: ${{ inputs.run-poetry-install == 'true' }}
			
 
				+        cache: poetry
			
 
				+    - name: Set Poetry environment
			
 
				       shell: bash
			
 
				       run: |
			
 
				+        # This line used to be needed, but seems to have been
			
 
				+        # sorted with newer poetry versions. We can still check whether
			
 
				+        # the right version of python is used by looking at the output of
			
 
				+        # `poetry run which python` below and whether the right version
			
 
				+        # of python is used in the tests (or whatever step is being done)
			
 
				+        # poetry env use "python${{ inputs.python-version }}"
			
 
				         poetry config virtualenvs.create true
			
 
				         poetry config virtualenvs.in-project true
			
 
				-        poetry env use ${{ inputs.python-version }}
			
 
				+    - name: Install dependencies
			
 
				+      if: ${{ (inputs.run-poetry-install == 'true')  && (steps.setup-python.outputs.cache-hit != 'true') }}
			
 
				+      shell: bash
			
 
				+      run: |
			
 
				         poetry install --no-interaction --no-root ${{ inputs.poetry-dependency-install-flags }}
			
 
				     # Now run same command but let the package install too
			
 
				     - name: Install package
			
 
				+      # To ensure that the package is always installed, this step is run even if the cache was hit
			
 
				       if: ${{ inputs.run-poetry-install == 'true' }}
			
 
				       shell: bash
			
 
				-      run: poetry install --no-interaction ${{ inputs.poetry-dependency-install-flags }}
			
 
				+      run: |
			
 
				+        poetry install --no-interaction ${{ inputs.poetry-dependency-install-flags }}
			
 
				+        poetry run python --version  # Check python version just in case
			
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,9 @@
 
				+## Description
			
 
				+
			
 
				+## Checklist
			
 
				+
			
 
				+Please confirm that this pull request has done the following:
			
 
				+
			
 
				+- [ ] Tests added
			
 
				+- [ ] Documentation added (where applicable)
			
 
				+- [ ] Changelog item added to `changelog/`
			
--- a/.github/workflows/bump.yaml
+++ b/.github/workflows/bump.yaml
@@ -47,14 +47,21 @@ jobs:
 
				       - name: Create bump and changelog
			
 
				 
			
 
				         run: |
			
 
				-          BASE_VERSION=`poetry version -s`
			
 
				-          NEW_VERSION=`poetry version -s ${{ github.event.inputs.bump_rule }}`
			
 
				-          poetry run towncrier build --yes --version v$NEW_VERSION
			
 
				-
			
 
				           git config --global user.name "$GITHUB_ACTOR"
			
 
				           git config --global user.email "$CI_COMMIT_EMAIL"
			
 
				 
			
 
				+          # Bump
			
 
				+          BASE_VERSION=`poetry version -s`
			
 
				+          NEW_VERSION=`poetry version -s ${{ github.event.inputs.bump_rule }}`
			
 
				+          echo "Bumping version $BASE_VERSION > $NEW_VERSION"
			
 
				+          poetry run towncrier build --yes --version v$NEW_VERSION
			
 
				           git commit -a -m "bump: version $BASE_VERSION -> $NEW_VERSION"
			
 
				           git tag v$NEW_VERSION
			
 
				+
			
 
				+          # Bump to alpha (so that future commits do not have the same
			
 
				+          # version as the tagged commit)
			
 
				+          BASE_VERSION=`poetry version -s`
			
 
				+          NEW_VERSION=`poetry version -s prerelease`
			
 
				+          echo "Bumping version $BASE_VERSION > $NEW_VERSION"
			
 
				+          git commit -a -m "bump(pre-release): version $BASE_VERSION > $NEW_VERSION"
			
 
				           git push && git push --tags
			
 
				-          echo "Bumped to version $NEW_VERSION"
			
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -15,6 +15,7 @@ jobs:
 
				         uses: actions/checkout@v3
			
 
				       - uses: ./.github/actions/setup
			
 
				         with:
			
 
				+          os: "ubuntu-latest"
			
 
				           python-version: "3.9"
			
 
				           venv-id: "docs"
			
 
				           poetry-dependency-install-flags: "--all-extras --only 'main,dev'"
			
@@ -29,6 +30,7 @@ jobs:
 
				         uses: actions/checkout@v3
			
 
				       - uses: ./.github/actions/setup
			
 
				         with:
			
 
				+          os: "ubuntu-latest"
			
 
				           python-version: "3.9"
			
 
				           venv-id: "docs"
			
 
				           poetry-dependency-install-flags: "--all-extras --only 'main,docs'"
			
@@ -42,11 +44,18 @@ jobs:
 
				         os: [ "ubuntu-latest" ]
			
 
				         python-version: [ "3.9", "3.10", "3.11" ]
			
 
				     runs-on: "${{ matrix.os }}"
			
 
				+    defaults:
			
 
				+      run:
			
 
				+        # This might be needed for Windows and doesn't seem to affect unix-based systems
			
 
				+        # so we include it. If you have better proof of whether this is needed or not,
			
 
				+        # feel free to update.
			
 
				+        shell: bash
			
 
				     steps:
			
 
				       - name: Check out repository
			
 
				         uses: actions/checkout@v3
			
 
				       - uses: ./.github/actions/setup
			
 
				         with:
			
 
				+          os: "${{ matrix.os }}"
			
 
				           python-version: "${{ matrix.python-version }}"
			
 
				           venv-id: "tests-${{ runner.os }}"
			
 
				           poetry-dependency-install-flags: "--all-extras"
			
@@ -92,6 +101,8 @@ jobs:
 
				       - name: Check build
			
 
				         run: |
			
 
				           tar -tvf dist/unfccc_ghg_data-*.tar.gz --wildcards '*unfccc_ghg_data/py.typed'
			
 
				+          tar -tvf dist/unfccc_ghg_data-*.tar.gz --wildcards 'unfccc_ghg_data-*/LICENCE'
			
 
				+
			
 
				 
			
 
				   check-dependency-licences:
			
 
				     runs-on: ubuntu-latest
			
--- a/.github/workflows/install.yaml
+++ b/.github/workflows/install.yaml
@@ -1,12 +1,14 @@
 
				-name: Install
			
 
				+name: Test installation
			
 
				 
			
 
				 on:
			
 
				+  workflow_dispatch:
			
 
				   schedule:
			
 
				     # * is a special character in YAML so you have to quote this string
			
 
				     - cron:  '0 0 * * 3'
			
 
				 
			
 
				 jobs:
			
 
				   test-pypi-install:
			
 
				+    name: Test PyPI install (${{ matrix.python-version }}, ${{ matrix.os }})
			
 
				     runs-on: "${{ matrix.os }}"
			
 
				     strategy:
			
 
				       fail-fast: false
			
@@ -25,6 +27,34 @@ jobs:
 
				         pip install unfccc-ghg-data
			
 
				     - name: Checkout repository
			
 
				       uses: actions/checkout@v3
			
 
				-    - name: Test installation (${{ matrix.os }} Python ${{ matrix.python-version }})
			
 
				+    - name: Test installation
			
 
				       run: |
			
 
				+        which python
			
 
				+        python scripts/test-install.py
			
 
				+
			
 
				+  test-micromamba-installation:
			
 
				+    name: Test (micro)mamba install (${{ matrix.python-version }}, ${{ matrix.os }})
			
 
				+    runs-on: "${{ matrix.os }}"
			
 
				+    strategy:
			
 
				+      fail-fast: false
			
 
				+      matrix:
			
 
				+        os: ["ubuntu-latest", "macos-latest", "windows-latest"]
			
 
				+        python-version: [ "3.9", "3.10", "3.11" ]
			
 
				+
			
 
				+    steps:
			
 
				+    - name: Setup (micro)mamba and install package
			
 
				+      uses: mamba-org/setup-micromamba@v1
			
 
				+      with:
			
 
				+        environment-name: test-mamba-install
			
 
				+        create-args: >-
			
 
				+          python=${{ matrix.python-version }}
			
 
				+          -c conda-forge
			
 
				+          unfccc-ghg-data
			
 
				+        init-shell: bash
			
 
				+    - name: Checkout repository
			
 
				+      uses: actions/checkout@v3
			
 
				+    - name: Test installation
			
 
				+      shell: bash -leo pipefail {0}
			
 
				+      run: |
			
 
				+        which python
			
 
				         python scripts/test-install.py
			
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -39,9 +39,8 @@ jobs:
 
				         uses: softprops/action-gh-release@v1
			
 
				         with:
			
 
				           body_path: ".github/release_template.md"
			
 
				+          token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
			
 
				           draft: true
			
 
				           files: |
			
 
				             dist/unfccc_ghg_data-${{ env.PROJECT_VERSION }}-py3-none-any.whl
			
 
				             dist/unfccc_ghg_data-${{ env.PROJECT_VERSION }}.tar.gz
			
 
				-        env:
			
 
				-          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
			
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,9 @@ geckodriver.log
 
				 # Jupyter cache
			
 
				 .jupyter_cache
			
 
				 
			
 
				+# Ruff cache
			
 
				+.ruff_cache
			
 
				+
			
 
				 # Licence check
			
 
				 licence-check.txt
			
 
				 
			
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ ci:
 
				 # See https://pre-commit.com/hooks.html for more hooks
			
 
				 repos:
			
 
				   - repo: https://github.com/pre-commit/pre-commit-hooks
			
 
				-    rev: 'v4.4.0'
			
 
				+    rev: 'v4.5.0'
			
 
				     hooks:
			
 
				       - id: check-added-large-files
			
 
				       - id: check-ast
			
@@ -31,20 +31,12 @@ repos:
 
				         language: fail
			
 
				         files: "\\.rej$"
			
 
				   - repo: https://github.com/charliermarsh/ruff-pre-commit
			
 
				-    rev: 'v0.0.263'
			
 
				+    rev: 'v0.1.8'
			
 
				     hooks:
			
 
				       - id: ruff
			
 
				         args: [ --fix, --exit-non-zero-on-fix ]
			
 
				-  - repo: https://github.com/psf/black
			
 
				-    rev: '23.3.0'
			
 
				-    hooks:
			
 
				-      - id: black
			
 
				-  # additional to the above, apply black to doctests in source code
			
 
				-  - repo: https://github.com/keewis/blackdoc
			
 
				-    rev: v0.3.8
			
 
				-    hooks:
			
 
				-      - id: blackdoc
			
 
				+      - id: ruff-format
			
 
				   - repo: https://github.com/python-poetry/poetry
			
 
				-    rev: '1.4.2'
			
 
				+    rev: '1.7.0'
			
 
				     hooks:
			
 
				       - id: poetry-check
			
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -11,14 +11,19 @@ version: 2
 
				 build:
			
 
				   os: ubuntu-22.04
			
 
				   tools:
			
 
				-    python: "3.9"
			
 
				+    python: "3.11"
			
 
				   jobs:
			
 
				     post_create_environment:
			
 
				       - pip install poetry
			
 
				       - poetry config virtualenvs.create false
			
 
				     post_install:
			
 
				-      - poetry install --with docs --all-extras
			
 
				+      # RtD seems to be not happy with poetry installs,
			
 
				+      # hence use pip directly instead.
			
 
				+      - poetry export -f requirements.txt --output requirements.txt --with docs
			
 
				+      - python -m pip install -r requirements.txt
			
 
				+      - python -m pip install .
			
 
				+      - python -m pip list
			
 
				 
			
 
				-# Build documentation in the docs/ directory with Sphinx
			
 
				+# Set sphinx configuration
			
 
				 sphinx:
			
 
				    configuration: docs/source/conf.py
			
--- a/LICENCE
+++ b/LICENCE
@@ -0,0 +1 @@
 
				+To be decided by project implementer
			
--- a/Makefile
+++ b/Makefile
@@ -18,6 +18,7 @@ endef
 
				 export PRINT_HELP_PYSCRIPT
			
 
				 
			
 
				 
			
 
				+.PHONY: help
			
 
				 help:  ## print short description of each target
			
 
				 	@python3 -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
			
 
				 
			
@@ -31,14 +32,13 @@ checks:  ## run all the linting checks of the codebase
 
				 		echo "=== mypy ==="; MYPYPATH=stubs poetry run mypy src || echo "--- mypy failed ---" >&2; \
			
 
				 		echo "======"
			
 
				 
			
 
				-.PHONY: black
			
 
				-black:  ## format the code using black
			
 
				-	poetry run black src tests docs/source/conf.py scripts docs/source/notebooks/*.py
			
 
				-	poetry run blackdoc src
			
 
				-
			
 
				 .PHONY: ruff-fixes
			
 
				 ruff-fixes:  ## fix the code using ruff
			
 
				+    # format before and after checking so that the formatted stuff is checked and
			
 
				+    # the fixed stuff is formatted
			
 
				+	poetry run ruff format src tests scripts docs/source/conf.py docs/source/notebooks/*.py
			
 
				 	poetry run ruff src tests scripts docs/source/conf.py docs/source/notebooks/*.py --fix
			
 
				+	poetry run ruff format src tests scripts docs/source/conf.py docs/source/notebooks/*.py
			
 
				 
			
 
				 .PHONY: ruff-fixes-current
			
 
				 ruff-fixes-current:  ## fix the code using ruff
			
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Reading country greenhouse gas data submitted to the United Nations Framework Co
 
				 
			
 
				 
			
 
				 [![CI](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/ci.yaml)
			
 
				-[![Coverage](https://codecov.io/gh/climate-resource/UNFCCC_non-AnnexI_data/branch/main/graph/badge.svg)](https://codecov.io/gh/climate-resource/UNFCCC_non-AnnexI_data)
			
 
				+[![Coverage](https://codecov.io/gh/JGuetschow/UNFCCC_non-AnnexI_data/branch/main/graph/badge.svg)](https://codecov.io/gh/JGuetschow/UNFCCC_non-AnnexI_data)
			
 
				 [![Docs](https://readthedocs.org/projects/unfccc-ghg-data/badge/?version=latest)](https://unfccc-ghg-data.readthedocs.io)
			
 
				 
			
 
				 **PyPI :**
			
@@ -17,7 +17,7 @@ Reading country greenhouse gas data submitted to the United Nations Framework Co
 
				 [![PyPI install](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/install.yaml/badge.svg?branch=main)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/install.yaml)
			
 
				 
			
 
				 **Other info :**
			
 
				-[![License](https://img.shields.io/github/license/JGuetschow/UNFCCC_non-AnnexI_data.svg)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/blob/main/LICENSE)
			
 
				+[![Licence](https://img.shields.io/github/license/JGuetschow/UNFCCC_non-AnnexI_data.svg)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/blob/main/LICENCE)
			
 
				 [![Last Commit](https://img.shields.io/github/last-commit/JGuetschow/UNFCCC_non-AnnexI_data.svg)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/commits/main)
			
 
				 [![Contributors](https://img.shields.io/github/contributors/JGuetschow/UNFCCC_non-AnnexI_data.svg)](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/graphs/contributors)
			
 
				 
			
@@ -33,10 +33,11 @@ don't render correctly on GitHub's viewer.
 
				 
			
 
				 <!--- sec-begin-installation -->
			
 
				 
			
 
				-Country greenhouse gas data submitted to the UNFCCC can be installed with conda or pip:
			
 
				+Country greenhouse gas data submitted to the UNFCCC can be installed with pip, mamba or conda:
			
 
				 
			
 
				 ```bash
			
 
				 pip install unfccc-ghg-data
			
 
				+mamba install -c conda-forge unfccc-ghg-data
			
 
				 conda install -c conda-forge unfccc-ghg-data
			
 
				 ```
			
 
				 
			
@@ -69,12 +70,11 @@ but we generally discourage this because it can be error prone.
 
				 In order to create your environment, run `make virtual-environment`.
			
 
				 
			
 
				 If there are any issues, the messages from the `Makefile` should guide you
			
 
				-through. If not, please raise an issue in the [issue tracker][issue_tracker].
			
 
				+through. If not, please raise an issue in the
			
 
				+[issue tracker](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/issues).
			
 
				 
			
 
				 For the rest of our developer docs, please see [](development-reference).
			
 
				 
			
 
				-[issue_tracker]: https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/issues
			
 
				-
			
 
				 <!--- sec-end-installation-dev -->
			
 
				 
			
 
				 
			
@@ -309,4 +309,3 @@ files, you can find out what has been done to the dataset or to individual
 
				 files by whom, and when.
			
 
				 
			
 
				 <!--- sec-end-datalad -->
			
 
				-
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_downloader.download_btr.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_downloader.download_btr.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_downloader.download\_btr
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_downloader.download_btr
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_downloader.download_btr
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_btr.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_btr.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_downloader.fetch\_submissions\_btr
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_btr
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_btr
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_downloader.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_downloader.rst
@@ -10,9 +10,11 @@ unfccc\_ghg\_data.unfccc\_downloader
 
				   :toctree: ./
			
 
				 
			
 
				   unfccc_ghg_data.unfccc_downloader.download_annexI
			
 
				+  unfccc_ghg_data.unfccc_downloader.download_btr
			
 
				   unfccc_ghg_data.unfccc_downloader.download_ndc
			
 
				   unfccc_ghg_data.unfccc_downloader.download_nonannexI
			
 
				   unfccc_ghg_data.unfccc_downloader.fetch_submissions_annexI
			
 
				+  unfccc_ghg_data.unfccc_downloader.fetch_submissions_btr
			
 
				   unfccc_ghg_data.unfccc_downloader.fetch_submissions_bur
			
 
				   unfccc_ghg_data.unfccc_downloader.fetch_submissions_nc
			
 
				   unfccc_ghg_data.unfccc_downloader.unfccc_submission_info
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_downloader.unfccc_submission_info.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_downloader.unfccc_submission_info.rst
@@ -5,3 +5,15 @@ unfccc\_ghg\_data.unfccc\_downloader.unfccc\_submission\_info
 
				 
			
 
				 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.unfccc_submission_info
			
 
				 
			
 
				+
			
 
				+
			
 
				+get\_unfccc\_submission\_info
			
 
				+=============================
			
 
				+
			
 
				+.. autofunction:: get_unfccc_submission_info
			
 
				+
			
 
				+
			
 
				+get\_BTR\_name\_and\_URL
			
 
				+========================
			
 
				+
			
 
				+.. autofunction:: get_BTR_name_and_URL
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.config_arg_bur5.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.config_arg_bur5.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Argentina.config\_arg\_bur5
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Argentina.config_arg_bur5
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Argentina.config_arg_bur5
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR5_from_csv.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR5_from_csv.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Argentina.read\_ARG\_BUR5\_from\_csv
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR5_from_csv
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR5_from_csv
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.rst
@@ -9,4 +9,6 @@ unfccc\_ghg\_data.unfccc\_reader.Argentina
 
				 .. autosummary::
			
 
				   :toctree: ./
			
 
				 
			
 
				+  unfccc_ghg_data.unfccc_reader.Argentina.config_arg_bur5
			
 
				   unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR4_from_pdf
			
 
				+  unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR5_from_csv
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.config_bdi_bur1.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.config_bdi_bur1.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Burundi.config\_bdi\_bur1
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Burundi.config_bdi_bur1
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Burundi.config_bdi_bur1
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.read_BDI_BUR1_from_pdf.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.read_BDI_BUR1_from_pdf.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Burundi.read\_BDI\_BUR1\_from\_pdf
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Burundi.read_BDI_BUR1_from_pdf
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Burundi.read_BDI_BUR1_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Burundi.rst
@@ -0,0 +1,13 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Burundi
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Burundi
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Burundi
			
 
				+
			
 
				+
			
 
				+.. autosummary::
			
 
				+  :toctree: ./
			
 
				+
			
 
				+  unfccc_ghg_data.unfccc_reader.Burundi.config_bdi_bur1
			
 
				+  unfccc_ghg_data.unfccc_reader.Burundi.read_BDI_BUR1_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.config_gin_bur1.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.config_gin_bur1.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Guinea.config\_gin\_bur1
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Guinea.config_gin_bur1
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Guinea.config_gin_bur1
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.read_GIN_BUR1_from_pdf.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.read_GIN_BUR1_from_pdf.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Guinea.read\_GIN\_BUR1\_from\_pdf
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Guinea.read_GIN_BUR1_from_pdf
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Guinea.read_GIN_BUR1_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Guinea.rst
@@ -0,0 +1,13 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Guinea
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Guinea
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Guinea
			
 
				+
			
 
				+
			
 
				+.. autosummary::
			
 
				+  :toctree: ./
			
 
				+
			
 
				+  unfccc_ghg_data.unfccc_reader.Guinea.config_gin_bur1
			
 
				+  unfccc_ghg_data.unfccc_reader.Guinea.read_GIN_BUR1_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Israel.config_isr_bur2.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Israel.config_isr_bur2.rst
@@ -5,3 +5,9 @@ unfccc\_ghg\_data.unfccc\_reader.Israel.config\_isr\_bur2
 
				 
			
 
				 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Israel.config_isr_bur2
			
 
				 
			
 
				+
			
 
				+
			
 
				+is\_int
			
 
				+=======
			
 
				+
			
 
				+.. autofunction:: is_int
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Mexico.config_mex_bur3.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Mexico.config_mex_bur3.rst
@@ -5,3 +5,9 @@ unfccc\_ghg\_data.unfccc\_reader.Mexico.config\_mex\_bur3
 
				 
			
 
				 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Mexico.config_mex_bur3
			
 
				 
			
 
				+
			
 
				+
			
 
				+fix\_rows
			
 
				+=========
			
 
				+
			
 
				+.. autofunction:: fix_rows
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.config_mng_bur2.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.config_mng_bur2.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Mongolia.config\_mng\_bur2
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Mongolia.config_mng_bur2
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Mongolia.config_mng_bur2
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.read_MNG_BUR2_from_pdf.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.read_MNG_BUR2_from_pdf.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Mongolia.read\_MNG\_BUR2\_from\_pdf
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Mongolia.read_MNG_BUR2_from_pdf
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Mongolia.read_MNG_BUR2_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Mongolia.rst
@@ -0,0 +1,13 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Mongolia
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Mongolia
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Mongolia
			
 
				+
			
 
				+
			
 
				+.. autosummary::
			
 
				+  :toctree: ./
			
 
				+
			
 
				+  unfccc_ghg_data.unfccc_reader.Mongolia.config_mng_bur2
			
 
				+  unfccc_ghg_data.unfccc_reader.Mongolia.read_MNG_BUR2_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_KOR_INV2023.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_KOR_INV2023.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.config\_KOR\_INV2023
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_KOR_INV2023
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_KOR_INV2023
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2023_Inventory_from_xlsx.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2023_Inventory_from_xlsx.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.read\_KOR\_2023\_Inventory\_from\_xlsx
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2023_Inventory_from_xlsx
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2023_Inventory_from_xlsx
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.rst
@@ -9,7 +9,9 @@ unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea
 
				 .. autosummary::
			
 
				   :toctree: ./
			
 
				 
			
 
				+  unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_KOR_INV2023
			
 
				   unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_kor_bur4
			
 
				   unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2021_Inventory_from_xlsx
			
 
				   unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2022_Inventory_from_xlsx
			
 
				+  unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2023_Inventory_from_xlsx
			
 
				   unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_BUR4_from_xlsx
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2022.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2022.rst
@@ -5,3 +5,9 @@ unfccc\_ghg\_data.unfccc\_reader.Taiwan.config\_twn\_nir2022
 
				 
			
 
				 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2022
			
 
				 
			
 
				+
			
 
				+
			
 
				+fix\_rows
			
 
				+=========
			
 
				+
			
 
				+.. autofunction:: fix_rows
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2023.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2023.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Taiwan.config\_twn\_nir2023
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2023
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2023
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2023_Inventory_from_pdf.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2023_Inventory_from_pdf.rst
@@ -0,0 +1,6 @@
 
				+unfccc\_ghg\_data.unfccc\_reader.Taiwan.read\_TWN\_2023\_Inventory\_from\_pdf
			
 
				+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
			
 
				+
			
 
				+.. automodule:: unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2023_Inventory_from_pdf
			
 
				+
			
 
				+.. currentmodule:: unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2023_Inventory_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.rst
@@ -10,4 +10,6 @@ unfccc\_ghg\_data.unfccc\_reader.Taiwan
 
				   :toctree: ./
			
 
				 
			
 
				   unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2022
			
 
				+  unfccc_ghg_data.unfccc_reader.Taiwan.config_twn_nir2023
			
 
				   unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2022_Inventory_from_pdf
			
 
				+  unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2023_Inventory_from_pdf
			
--- a/docs/source/api/unfccc_ghg_data.unfccc_reader.rst
+++ b/docs/source/api/unfccc_ghg_data.unfccc_reader.rst
@@ -10,12 +10,15 @@ unfccc\_ghg\_data.unfccc\_reader
 
				   :toctree: ./
			
 
				 
			
 
				   unfccc_ghg_data.unfccc_reader.Argentina
			
 
				+  unfccc_ghg_data.unfccc_reader.Burundi
			
 
				   unfccc_ghg_data.unfccc_reader.Chile
			
 
				   unfccc_ghg_data.unfccc_reader.Colombia
			
 
				+  unfccc_ghg_data.unfccc_reader.Guinea
			
 
				   unfccc_ghg_data.unfccc_reader.Indonesia
			
 
				   unfccc_ghg_data.unfccc_reader.Israel
			
 
				   unfccc_ghg_data.unfccc_reader.Malaysia
			
 
				   unfccc_ghg_data.unfccc_reader.Mexico
			
 
				+  unfccc_ghg_data.unfccc_reader.Mongolia
			
 
				   unfccc_ghg_data.unfccc_reader.Montenegro
			
 
				   unfccc_ghg_data.unfccc_reader.Morocco
			
 
				   unfccc_ghg_data.unfccc_reader.Nigeria
			
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -10,10 +10,10 @@ from pathlib import Path
 
				 
			
 
				 from sphinxcontrib_autodocgen import AutoDocGen
			
 
				 
			
 
				-os.environ["UNFCCC_GHG_ROOT_PATH"] = str(Path("..") / "..")
			
 
				-
			
 
				 import unfccc_ghg_data
			
 
				 
			
 
				+os.environ["UNFCCC_GHG_ROOT_PATH"] = str(Path("..") / "..")
			
 
				+
			
 
				 # -- Project information -----------------------------------------------------
			
 
				 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
			
 
				 
			
@@ -152,8 +152,16 @@ exec_code_example_dir = "."
 
				 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
			
 
				 
			
 
				 # Pick your theme for html output, we typically use the read the docs theme
			
 
				-html_theme = "sphinx_rtd_theme"
			
 
				+html_theme = "sphinx_book_theme"
			
 
				 html_static_path = ["_static"]
			
 
				+html_theme_options = {
			
 
				+    "repository_url": "https://github.com/JGuetschow/UNFCCC_non-AnnexI_data",
			
 
				+    "repository_branch": "main",
			
 
				+    "path_to_docs": "docs/source",
			
 
				+    "use_repository_button": True,
			
 
				+    "use_issues_button": True,
			
 
				+    "use_edit_page_button": True,
			
 
				+}
			
 
				 
			
 
				 
			
 
				 # Ignore ipynb files when building (see https://github.com/executablebooks/MyST-NB/issues/363).
			
--- a/docs/source/development.md
+++ b/docs/source/development.md
@@ -2,6 +2,34 @@
 
				 # Development
			
 
				 
			
 
				 Notes for developers. If you want to get involved, please do!
			
 
				+We welcome all kinds of contributions, for example:
			
 
				+
			
 
				+- docs fixes/clarifications
			
 
				+- bug reports
			
 
				+- bug fixes
			
 
				+- feature requests
			
 
				+- pull requests
			
 
				+- tutorials
			
 
				+
			
 
				+## Workflows
			
 
				+
			
 
				+<!---
			
 
				+This section is auto-generated by the copier template
			
 
				+and the text below is just a placeholder to get you started.
			
 
				+The workflows section will likely need to be updated
			
 
				+to be project specific as the project's norms are established.
			
 
				+-->
			
 
				+
			
 
				+We don't mind whether you use a branching or forking workflow.
			
 
				+However, please only push to your own branches,
			
 
				+pushing to other people's branches is often a recipe for disaster,
			
 
				+is never required in our experience
			
 
				+so is best avoided.
			
 
				+
			
 
				+Try and keep your merge requests as small as possible
			
 
				+(focus on one thing if you can).
			
 
				+This makes life much easier for reviewers
			
 
				+which allows contributions to be accepted at a faster rate.
			
 
				 
			
 
				 ## Language
			
 
				 
			
@@ -28,13 +56,13 @@ The steps required are the following:
 
				 
			
 
				 
			
 
				 1. Bump the version: manually trigger the "bump" workflow from the main branch
			
 
				-   (see here: https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/bump.yaml).
			
 
				-   A valid "bump_rule" (see https://python-poetry.org/docs/cli/#version) will need to be specified.
			
 
				+   (see here: [bump workflow](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/actions/workflows/bump.yaml)).
			
 
				+   A valid "bump_rule" (see [poetry's docs](https://python-poetry.org/docs/cli/#version)) will need to be specified.
			
 
				    This will then trigger a draft release.
			
 
				 
			
 
				 1. Edit the draft release which has been created
			
 
				    (see here:
			
 
				-   https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/releases).
			
 
				+   [project releases](https://github.com/JGuetschow/UNFCCC_non-AnnexI_data/releases)).
			
 
				    Once you are happy with the release (removed placeholders, added key
			
 
				    announcements etc.) then hit 'Publish release'. This triggers a release to
			
 
				    PyPI (which you can then add to the release if you want).
			
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -29,13 +29,11 @@
 
				 :caption: Contents
			
 
				 :maxdepth: 2
			
 
				 usage
			
 
				-notebooks
			
 
				 development
			
 
				 api/unfccc_ghg_data
			
 
				 changelog
			
 
				 ```
			
 
				 
			
 
				-
			
 
				 Index
			
 
				 -----
			
 
				 
			
--- a/docs/source/notebooks.md
+++ b/docs/source/notebooks.md
@@ -1,23 +0,0 @@
 
				-(notebooks-reference)=
			
 
				-# Notebooks
			
 
				-
			
 
				-TODO
			
 
				-
			
 
				-Here we provide various examples of how to use Country greenhouse gas data submitted to the UNFCCC.
			
 
				-They  are derived from
			
 
				-[jupyter notebooks](https://docs.jupyter.org/en/latest/start/index.html),
			
 
				-but are saved using [jupytext](https://jupytext.readthedocs.io/en/latest/)
			
 
				-to keep our repository slim and make it easier to track changes.
			
 
				-
			
 
				-## Basic demos
			
 
				-
			
 
				-```{toctree}
			
 
				-:caption: Contents
			
 
				-:maxdepth: 1
			
 
				-notebooks/basic-demo.py
			
 
				-```
			
 
				-
			
 
				-## Notebook execution info
			
 
				-
			
 
				-```{nb-exec-table}
			
 
				-```
			
--- a/docs/source/notebooks/basic-demo.py
+++ b/docs/source/notebooks/basic-demo.py
@@ -1,24 +0,0 @@
 
				-# ---
			
 
				-# jupyter:
			
 
				-#   jupytext:
			
 
				-#     text_representation:
			
 
				-#       extension: .py
			
 
				-#       format_name: percent
			
 
				-#       format_version: '1.3'
			
 
				-#       jupytext_version: 1.14.5
			
 
				-#   kernelspec:
			
 
				-#     display_name: Python 3 (ipykernel)
			
 
				-#     language: python
			
 
				-#     name: python3
			
 
				-# ---
			
 
				-
			
 
				-# %% [markdown]
			
 
				-# # Basic demo
			
 
				-#
			
 
				-# This notebook gives a basic demonstration of how to use Country greenhouse gas data submitted to the UNFCCC.
			
 
				-
			
 
				-# %%
			
 
				-import unfccc_ghg_data
			
 
				-
			
 
				-# %%
			
 
				-print(f"You are using unfccc_ghg_data version {unfccc_ghg_data.__version__}")
			
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,23 @@
 
				 # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
			
 
				 
			
 
				+[[package]]
			
 
				+name = "accessible-pygments"
			
 
				+version = "0.0.5"
			
 
				+description = "A collection of accessible pygments styles"
			
 
				+optional = false
			
 
				+python-versions = ">=3.9"
			
 
				+files = [
			
 
				+    {file = "accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7"},
			
 
				+    {file = "accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872"},
			
 
				+]
			
 
				+
			
 
				+[package.dependencies]
			
 
				+pygments = ">=1.5"
			
 
				+
			
 
				+[package.extras]
			
 
				+dev = ["pillow", "pkginfo (>=1.10)", "playwright", "pre-commit", "setuptools", "twine (>=5.0)"]
			
 
				+tests = ["hypothesis", "pytest"]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "alabaster"
			
 
				 version = "0.7.16"
			
@@ -124,73 +142,6 @@ charset-normalizer = ["charset-normalizer"]
 
				 html5lib = ["html5lib"]
			
 
				 lxml = ["lxml"]
			
 
				 
			
 
				-[[package]]
			
 
				-name = "black"
			
 
				-version = "23.3.0"
			
 
				-description = "The uncompromising code formatter."
			
 
				-optional = false
			
 
				-python-versions = ">=3.7"
			
 
				-files = [
			
 
				-    {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
			
 
				-    {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
			
 
				-    {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
			
 
				-    {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
			
 
				-    {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
			
 
				-    {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
			
 
				-    {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
			
 
				-    {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
			
 
				-    {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
			
 
				-    {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
			
 
				-    {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
			
 
				-    {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
			
 
				-    {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
			
 
				-    {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
			
 
				-    {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
			
 
				-    {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
			
 
				-    {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
			
 
				-    {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
			
 
				-    {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
			
 
				-    {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
			
 
				-    {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
			
 
				-    {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
			
 
				-    {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
			
 
				-    {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
			
 
				-    {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
			
 
				-]
			
 
				-
			
 
				-[package.dependencies]
			
 
				-click = ">=8.0.0"
			
 
				-mypy-extensions = ">=0.4.3"
			
 
				-packaging = ">=22.0"
			
 
				-pathspec = ">=0.9.0"
			
 
				-platformdirs = ">=2"
			
 
				-tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
			
 
				-typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
			
 
				-
			
 
				-[package.extras]
			
 
				-colorama = ["colorama (>=0.4.3)"]
			
 
				-d = ["aiohttp (>=3.7.4)"]
			
 
				-jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
			
 
				-uvloop = ["uvloop (>=0.15.2)"]
			
 
				-
			
 
				-[[package]]
			
 
				-name = "blackdoc"
			
 
				-version = "0.3.8"
			
 
				-description = "run black on documentation code snippets"
			
 
				-optional = false
			
 
				-python-versions = ">=3.7"
			
 
				-files = [
			
 
				-    {file = "blackdoc-0.3.8-py3-none-any.whl", hash = "sha256:c003a1b72d57692b343815c8b7a15e78977caba96c86060def892602b5aba083"},
			
 
				-    {file = "blackdoc-0.3.8.tar.gz", hash = "sha256:3c9d5534f92557a627a31550c7faec8363b5b0929bbb0ca3f5df179a81a9d6b2"},
			
 
				-]
			
 
				-
			
 
				-[package.dependencies]
			
 
				-black = "*"
			
 
				-more-itertools = "*"
			
 
				-pathspec = "*"
			
 
				-rich = "*"
			
 
				-tomli = "*"
			
 
				-
			
 
				 [[package]]
			
 
				 name = "boto"
			
 
				 version = "2.49.0"
			
@@ -890,13 +841,13 @@ files = [
 
				 
			
 
				 [[package]]
			
 
				 name = "docutils"
			
 
				-version = "0.18.1"
			
 
				+version = "0.19"
			
 
				 description = "Docutils -- Python Documentation Utilities"
			
 
				 optional = false
			
 
				-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
			
 
				+python-versions = ">=3.7"
			
 
				 files = [
			
 
				-    {file = "docutils-0.18.1-py2.py3-none-any.whl", hash = "sha256:23010f129180089fbcd3bc08cfefccb3b890b0050e1ca00c867036e9d161b98c"},
			
 
				-    {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
			
 
				+    {file = "docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc"},
			
 
				+    {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
			
 
				 ]
			
 
				 
			
 
				 [[package]]
			
@@ -2585,17 +2536,6 @@ toolz = "*"
 
				 [package.extras]
			
 
				 complete = ["blosc", "numpy (>=1.20.0)", "pandas (>=1.3)", "pyzmq"]
			
 
				 
			
 
				-[[package]]
			
 
				-name = "pathspec"
			
 
				-version = "0.12.1"
			
 
				-description = "Utility library for gitignore style pattern matching of file paths."
			
 
				-optional = false
			
 
				-python-versions = ">=3.8"
			
 
				-files = [
			
 
				-    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
			
 
				-    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
			
 
				-]
			
 
				-
			
 
				 [[package]]
			
 
				 name = "patool"
			
 
				 version = "1.15.0"
			
@@ -3011,6 +2951,33 @@ files = [
 
				     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
			
 
				 ]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "pydata-sphinx-theme"
			
 
				+version = "0.15.2"
			
 
				+description = "Bootstrap-based Sphinx theme from the PyData community"
			
 
				+optional = false
			
 
				+python-versions = ">=3.9"
			
 
				+files = [
			
 
				+    {file = "pydata_sphinx_theme-0.15.2-py3-none-any.whl", hash = "sha256:0c5fa1fa98a9b26dae590666ff576f27e26c7ba708fee754ecb9e07359ed4588"},
			
 
				+    {file = "pydata_sphinx_theme-0.15.2.tar.gz", hash = "sha256:4243fee85b3afcfae9df64f83210a04e7182e53bc3db8841ffff6d21d95ae320"},
			
 
				+]
			
 
				+
			
 
				+[package.dependencies]
			
 
				+accessible-pygments = "*"
			
 
				+Babel = "*"
			
 
				+beautifulsoup4 = "*"
			
 
				+docutils = "!=0.17.0"
			
 
				+packaging = "*"
			
 
				+pygments = ">=2.7"
			
 
				+sphinx = ">=5.0"
			
 
				+typing-extensions = "*"
			
 
				+
			
 
				+[package.extras]
			
 
				+a11y = ["pytest-playwright"]
			
 
				+dev = ["nox", "pre-commit", "pydata-sphinx-theme[doc,test]", "pyyaml"]
			
 
				+doc = ["ablog (>=0.11.0rc2)", "colorama", "ipykernel", "ipyleaflet", "jupyter_sphinx", "jupyterlite-sphinx", "linkify-it-py", "matplotlib", "myst-parser", "nbsphinx", "numpy", "numpydoc", "pandas", "plotly", "rich", "sphinx-autoapi (>=3.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-favicon (>=1.0.1)", "sphinx-sitemap", "sphinx-togglebutton", "sphinxcontrib-youtube (<1.4)", "sphinxext-rediraffe", "xarray"]
			
 
				+test = ["pytest", "pytest-cov", "pytest-regressions"]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "pygments"
			
 
				 version = "2.18.0"
			
@@ -3128,13 +3095,13 @@ six = ">=1.5"
 
				 
			
 
				 [[package]]
			
 
				 name = "python-gitlab"
			
 
				-version = "4.4.0"
			
 
				+version = "4.5.0"
			
 
				 description = "A python wrapper for the GitLab API"
			
 
				 optional = false
			
 
				 python-versions = ">=3.8.0"
			
 
				 files = [
			
 
				-    {file = "python-gitlab-4.4.0.tar.gz", hash = "sha256:1d117bf7b433ae8255e5d74e72c660978f50ee85eb62248c9fb52ef43c3e3814"},
			
 
				-    {file = "python_gitlab-4.4.0-py3-none-any.whl", hash = "sha256:cdad39d016f59664cdaad0f878f194c79cb4357630776caa9a92c1da25c8d986"},
			
 
				+    {file = "python_gitlab-4.5.0-py3-none-any.whl", hash = "sha256:b078b63afab7624ef2084aac64e3a9f4488f55b2234017e05df1b7260169cb52"},
			
 
				+    {file = "python_gitlab-4.5.0.tar.gz", hash = "sha256:0a106174949819912b9abb4232e39059f83f613177fdb1787097eb84481c64b2"},
			
 
				 ]
			
 
				 
			
 
				 [package.dependencies]
			
@@ -3399,24 +3366,6 @@ files = [
 
				 [package.dependencies]
			
 
				 requests = ">=2.0.1,<3.0.0"
			
 
				 
			
 
				-[[package]]
			
 
				-name = "rich"
			
 
				-version = "13.7.1"
			
 
				-description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
			
 
				-optional = false
			
 
				-python-versions = ">=3.7.0"
			
 
				-files = [
			
 
				-    {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
			
 
				-    {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
			
 
				-]
			
 
				-
			
 
				-[package.dependencies]
			
 
				-markdown-it-py = ">=2.2.0"
			
 
				-pygments = ">=2.13.0,<3.0.0"
			
 
				-
			
 
				-[package.extras]
			
 
				-jupyter = ["ipywidgets (>=7.5.1,<9)"]
			
 
				-
			
 
				 [[package]]
			
 
				 name = "rpds-py"
			
 
				 version = "0.18.1"
			
@@ -3604,28 +3553,28 @@ files = [
 
				 
			
 
				 [[package]]
			
 
				 name = "ruff"
			
 
				-version = "0.0.264"
			
 
				-description = "An extremely fast Python linter, written in Rust."
			
 
				+version = "0.1.15"
			
 
				+description = "An extremely fast Python linter and code formatter, written in Rust."
			
 
				 optional = false
			
 
				 python-versions = ">=3.7"
			
 
				 files = [
			
 
				-    {file = "ruff-0.0.264-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:ec2fa192c035b8b68cc2b91049c561cd69543e2b8c4d157d9aa7727320bedcca"},
			
 
				-    {file = "ruff-0.0.264-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:d97ba8db0fb601ffe9ee996ebb97c698e427a2fd4514fefbe7b803111354f783"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4564e0f245eb515c6ed63988c21e9c40bcfd485cd1ec63bdd790f9a81d301f15"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:323ae6c1702b26c96d0fbf939c5959c37e79021f86b70f63634df918bc77f36e"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18a29ed37bf8cfe6dce8a2db56c313a64c0804095108753621f3c3321e0c9c5f"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d628de91e2be7a83128526636097d2dd890669a06143f826f6c591d79aeefbc4"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91c6eb4f979b661a2dd850d9ac803842bb7b66d4926de84f09c787af82590f73"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04ec5d75e4bca754cedd20d53e2ba4920d6259e7579abfb2e8e30c3c80e41b17"},
			
 
				-    {file = "ruff-0.0.264-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71fd865ebacc1083259b3fb7e3eb45235a86e62e21830b8a6b067be0ec54aa2e"},
			
 
				-    {file = "ruff-0.0.264-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:cd4f60ffc3eb15802c554a9c8581bf2117c4d3d06fbc57e0ba58f04cb1aaa47f"},
			
 
				-    {file = "ruff-0.0.264-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:05ee163a046fc593d150179d23f4af447fb82f3e59cd34e031ea0868c65bb8e8"},
			
 
				-    {file = "ruff-0.0.264-py3-none-musllinux_1_2_i686.whl", hash = "sha256:484e395d1984ab9e1e66bd42e7a5192decfee86998d07d36ee50b2fadccc8734"},
			
 
				-    {file = "ruff-0.0.264-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:67326fdc9ac0a1b13e229c6e24e8d115863c52cd710faaaaa588851535281d6c"},
			
 
				-    {file = "ruff-0.0.264-py3-none-win32.whl", hash = "sha256:5a8658ebcc37d62f72840cbdf564171c1a2b6831db482b4d917962541a2f4a44"},
			
 
				-    {file = "ruff-0.0.264-py3-none-win_amd64.whl", hash = "sha256:068a82a29d80848a56e3d9d4308e6e0ca8b2ecdaf5ac342a292545a59b7f2c21"},
			
 
				-    {file = "ruff-0.0.264-py3-none-win_arm64.whl", hash = "sha256:3e2c38449548e122f2612843a7c04e22b4fd491656955c57b8cb05df11639ad6"},
			
 
				-    {file = "ruff-0.0.264.tar.gz", hash = "sha256:8fcd4b693ca1374eb7a5796581c90689f884f98f388740d94f0702fd30f8f78f"},
			
 
				+    {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"},
			
 
				+    {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"},
			
 
				+    {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"},
			
 
				+    {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"},
			
 
				+    {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"},
			
 
				+    {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"},
			
 
				+    {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"},
			
 
				+    {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"},
			
 
				+    {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"},
			
 
				+    {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"},
			
 
				+    {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"},
			
 
				 ]
			
 
				 
			
 
				 [[package]]
			
@@ -3856,6 +3805,26 @@ files = [
 
				 [package.dependencies]
			
 
				 sphinx = "*"
			
 
				 
			
 
				+[[package]]
			
 
				+name = "sphinx-book-theme"
			
 
				+version = "1.1.2"
			
 
				+description = "A clean book theme for scientific explanations and documentation with Sphinx"
			
 
				+optional = false
			
 
				+python-versions = ">=3.9"
			
 
				+files = [
			
 
				+    {file = "sphinx_book_theme-1.1.2-py3-none-any.whl", hash = "sha256:cee744466fde48f50302b851291b208aa67e726ca31b7a3bfb9b6e6a145663e0"},
			
 
				+    {file = "sphinx_book_theme-1.1.2.tar.gz", hash = "sha256:7f3abcd146ca82e6f39d6db53711102b1c1d328d12f65e3e47ad9bf842614a49"},
			
 
				+]
			
 
				+
			
 
				+[package.dependencies]
			
 
				+pydata-sphinx-theme = ">=0.14"
			
 
				+sphinx = ">=5"
			
 
				+
			
 
				+[package.extras]
			
 
				+code-style = ["pre-commit"]
			
 
				+doc = ["ablog", "folium", "ipywidgets", "matplotlib", "myst-nb", "nbclient", "numpy", "numpydoc", "pandas", "plotly", "sphinx-copybutton", "sphinx-design", "sphinx-examples", "sphinx-tabs", "sphinx-thebe", "sphinx-togglebutton", "sphinxcontrib-bibtex", "sphinxcontrib-youtube", "sphinxext-opengraph"]
			
 
				+test = ["beautifulsoup4", "coverage", "myst-nb", "pytest", "pytest-cov", "pytest-regressions", "sphinx_thebe"]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "sphinx-copybutton"
			
 
				 version = "0.5.2"
			
@@ -3885,25 +3854,6 @@ files = [
 
				     {file = "sphinx_exec_code-0.10-py3-none-any.whl", hash = "sha256:2597460a7062bfd8ef1b108a8cec1dc10250d56a19034830b038ac653dee1902"},
			
 
				 ]
			
 
				 
			
 
				-[[package]]
			
 
				-name = "sphinx-rtd-theme"
			
 
				-version = "1.3.0"
			
 
				-description = "Read the Docs theme for Sphinx"
			
 
				-optional = false
			
 
				-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
			
 
				-files = [
			
 
				-    {file = "sphinx_rtd_theme-1.3.0-py2.py3-none-any.whl", hash = "sha256:46ddef89cc2416a81ecfbeaceab1881948c014b1b6e4450b815311a89fb977b0"},
			
 
				-    {file = "sphinx_rtd_theme-1.3.0.tar.gz", hash = "sha256:590b030c7abb9cf038ec053b95e5380b5c70d61591eb0b552063fbe7c41f0931"},
			
 
				-]
			
 
				-
			
 
				-[package.dependencies]
			
 
				-docutils = "<0.19"
			
 
				-sphinx = ">=1.6,<8"
			
 
				-sphinxcontrib-jquery = ">=4,<5"
			
 
				-
			
 
				-[package.extras]
			
 
				-dev = ["bump2version", "sphinxcontrib-httpdomain", "transifex-client", "wheel"]
			
 
				-
			
 
				 [[package]]
			
 
				 name = "sphinxcontrib-applehelp"
			
 
				 version = "1.0.8"
			
@@ -3952,20 +3902,6 @@ lint = ["docutils-stubs", "flake8", "mypy"]
 
				 standalone = ["Sphinx (>=5)"]
			
 
				 test = ["html5lib", "pytest"]
			
 
				 
			
 
				-[[package]]
			
 
				-name = "sphinxcontrib-jquery"
			
 
				-version = "4.1"
			
 
				-description = "Extension to include jQuery on newer Sphinx releases"
			
 
				-optional = false
			
 
				-python-versions = ">=2.7"
			
 
				-files = [
			
 
				-    {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"},
			
 
				-    {file = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl", hash = "sha256:f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"},
			
 
				-]
			
 
				-
			
 
				-[package.dependencies]
			
 
				-Sphinx = ">=1.8"
			
 
				-
			
 
				 [[package]]
			
 
				 name = "sphinxcontrib-jsmath"
			
 
				 version = "1.0.1"
			
@@ -4372,13 +4308,13 @@ zstd = ["zstandard (>=0.18.0)"]
 
				 
			
 
				 [[package]]
			
 
				 name = "virtualenv"
			
 
				-version = "20.26.1"
			
 
				+version = "20.26.2"
			
 
				 description = "Virtual Python Environment builder"
			
 
				 optional = false
			
 
				 python-versions = ">=3.7"
			
 
				 files = [
			
 
				-    {file = "virtualenv-20.26.1-py3-none-any.whl", hash = "sha256:7aa9982a728ae5892558bff6a2839c00b9ed145523ece2274fad6f414690ae75"},
			
 
				-    {file = "virtualenv-20.26.1.tar.gz", hash = "sha256:604bfdceaeece392802e6ae48e69cec49168b9c5f4a44e483963f9242eb0e78b"},
			
 
				+    {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"},
			
 
				+    {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"},
			
 
				 ]
			
 
				 
			
 
				 [package.dependencies]
			
@@ -4474,4 +4410,4 @@ plots = ["matplotlib"]
 
				 [metadata]
			
 
				 lock-version = "2.0"
			
 
				 python-versions = ">=3.9, <3.11"
			
 
				-content-hash = "a472b22884244e485f3bab9e73dc6baf95b8f3714a0e2ad19b25734de61971f9"
			
 
				+content-hash = "5e7c15209b92f58e81b3190530856f6e4cf1b4b427c05d333b636ca51965439c"
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,8 @@ description = "Reading country greenhouse gas data submitted to the United Natio
 
				 authors = ["Johannes Gütschow <mail@johannes-guetschow.de>"]
			
 
				 readme = "README.md"
			
 
				 packages = [{include = "unfccc_ghg_data", from = "src"}]
			
 
				+license = "TBD"
			
 
				+include = ["LICENCE"]  # poetry uses US English so assumes it will be spelt LICENSE
			
 
				 
			
 
				 [tool.poetry.dependencies]
			
 
				 python = ">=3.9, <3.11"
			
@@ -32,7 +34,7 @@ pytest = "^7.3.1"
 
				 
			
 
				 [tool.poetry.group.docs.dependencies]
			
 
				 myst-nb = "^0.17.0"
			
 
				-sphinx-rtd-theme = "^1.2.0"
			
 
				+sphinx-book-theme = "^1.1.0"
			
 
				 sphinx-autodoc-typehints = "^1.23.0"
			
 
				 sphinx-autodocgen = "^1.3"
			
 
				 jupytext = "^1.14.5"
			
@@ -41,10 +43,8 @@ sphinx-copybutton = "^0.5.2"
 
				 [tool.poetry.group.dev.dependencies]
			
 
				 pytest-cov = "^4.0.0"
			
 
				 coverage = "^7.2.0"
			
 
				-black = "23.3.0"
			
 
				-blackdoc = "0.3.8"
			
 
				 mypy = "^1.2.0"
			
 
				-ruff = "0.0.264"
			
 
				+ruff = "^0.1.8"
			
 
				 pre-commit = "^3.3.1"
			
 
				 towncrier = "^23.6.0"
			
 
				 liccheck = "^0.9.1"
			
@@ -113,8 +113,10 @@ ignore = [
 
				     "D200",  # One-line docstring should fit on one line with quotes
			
 
				     "D400",  # First line should end with a period
			
 
				 ]
			
 
				-# Provide some leeway for long docstring, this is otherwise handled by black
			
 
				-line-length = 110
			
 
				+line-length = 88
			
 
				+
			
 
				+[tool.ruff.format]
			
 
				+docstring-code-format = true
			
 
				 
			
 
				 [tool.ruff.per-file-ignores]
			
 
				 "test*.py" = [
			
@@ -138,7 +140,7 @@ known-first-party = ["src"]
 
				 convention = "numpy"
			
 
				 
			
 
				 [tool.towncrier]
			
 
				-package = "unfccc-ghg-data"
			
 
				+package = "unfccc_ghg_data"
			
 
				 package_dir = "src"
			
 
				 filename = "docs/source/changelog.md"
			
 
				 directory = "changelog/"
			
--- a/src/unfccc_ghg_data/unfccc_downloader/download_btr.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/download_btr.py
@@ -28,141 +28,147 @@ from unfccc_ghg_data.unfccc_downloader import get_BTR_name_and_URL
 
				 # python-selenium-firefox-driver-dismiss-open-save-file-popup
			
 
				 ###############
			
 
				 
			
 
				-descr = (
			
 
				-    "Download and unzip data from UNFCCC Biannial Transparency Reports Submissions. "
			
 
				-    "Based on download.py from national-inventory-submissions "
			
 
				-    "(https://github.com/openclimatedata/national-inventory-submisions)"
			
 
				-)
			
 
				-parser = argparse.ArgumentParser(description=descr)
			
 
				-
			
 
				-parser.add_argument("--round", help="Submission round to download, e.g. 1")
			
 
				-
			
 
				-args = parser.parse_args()
			
 
				-submission_round = int(args.round)
			
 
				-
			
 
				-round_name, url = get_BTR_name_and_URL(submission_round)
			
 
				-dataset = f"BTR{submission_round}"
			
 
				-
			
 
				-print(f"Downloading data for {round_name} BTRs")
			
 
				-
			
 
				-error_file_sizes = [212, 210]
			
 
				-
			
 
				-# Read submissions list
			
 
				-submissions = pd.read_csv(downloaded_data_path_UNFCCC / f"submissions-{dataset}.csv")
			
 
				-
			
 
				-# set options for headless mode
			
 
				-profile_path = ".firefox"
			
 
				-options = Options()
			
 
				-# options.add_argument('-headless')
			
 
				-
			
 
				-# create profile for headless mode and automatic downloading
			
 
				-options.set_preference("profile", profile_path)
			
 
				-options.set_preference("browser.download.folderList", 2)
			
 
				-
			
 
				-# set up selenium driver
			
 
				-driver = Firefox(options=options)
			
 
				-# visit the main data page once to create cookies
			
 
				-driver.get(url)
			
 
				-
			
 
				-# wait a bit for the website to load before we get the cookies
			
 
				-time.sleep(20)
			
 
				-
			
 
				-# get the session id cookie
			
 
				-cookies_selenium = driver.get_cookies()
			
 
				-cookies = {}
			
 
				-for cookie in cookies_selenium:
			
 
				-    cookies[cookie["name"]] = cookie["value"]
			
 
				-
			
 
				-new_downloaded = []
			
 
				-
			
 
				-for idx, submission in submissions.iterrows():
			
 
				-    print("=" * 60)
			
 
				-    title = submission.Title
			
 
				-    url = submission.URL
			
 
				-    country = submission.Country
			
 
				-    country = country.replace(" ", "_")
			
 
				-    print(f"Downloading {title} from {url}")
			
 
				-
			
 
				-    country_folder = downloaded_data_path_UNFCCC / country
			
 
				-    if not country_folder.exists():
			
 
				-        country_folder.mkdir()
			
 
				-    local_filename = (
			
 
				-        country_folder
			
 
				-        / dataset
			
 
				-        / url.split("/")[-1].replace("%20", "_").replace(" ", "_")
			
 
				+if __name__ == "__main__":
			
 
				+    descr = (
			
 
				+        "Download and unzip data from UNFCCC Biannial Transparency Reports Submissions."
			
 
				+        " Based on download.py from national-inventory-submissions "
			
 
				+        "(https://github.com/openclimatedata/national-inventory-submisions)"
			
 
				     )
			
 
				-    if not local_filename.parent.exists():
			
 
				-        local_filename.parent.mkdir()
			
 
				-
			
 
				-    if local_filename.exists():
			
 
				-        # check file size. if 210 or 212 bytes it's the error page
			
 
				-        if Path(local_filename).stat().st_size in error_file_sizes:
			
 
				-            # found the error page. delete file
			
 
				-            os.remove(local_filename)
			
 
				-
			
 
				-    # now we have removed error pages, so a present file should not be overwritten
			
 
				-    if (not local_filename.exists()) and (not local_filename.is_symlink()):
			
 
				-        i = 0  # reset counter
			
 
				-        while not local_filename.exists() and i < 10:  # noqa: PLR2004
			
 
				-            # for i = 0 and i = 5 try to get a new session ID
			
 
				-            if i == 1 or i == 5:  # noqa: PLR2004
			
 
				-                driver = Firefox(options=options)
			
 
				-
			
 
				-                # visit the main data page once to create cookies
			
 
				-                driver.get(url)
			
 
				-                time.sleep(20)
			
 
				-
			
 
				-                # get the session id cookie
			
 
				-                cookies_selenium = driver.get_cookies()
			
 
				-                cookies = {}
			
 
				-                for cookie in cookies_selenium:
			
 
				-                    cookies[cookie["name"]] = cookie["value"]
			
 
				-
			
 
				-            r = requests.get(url, stream=True, cookies=cookies)  # noqa: S113
			
 
				-            with open(str(local_filename), "wb") as f:
			
 
				-                shutil.copyfileobj(r.raw, f)
			
 
				+    parser = argparse.ArgumentParser(description=descr)
			
 
				 
			
 
				+    parser.add_argument("--round", help="Submission round to download, " "e.g. 1")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    submission_round = int(args.round)
			
 
				+
			
 
				+    round_name, url = get_BTR_name_and_URL(submission_round)
			
 
				+    dataset = f"BTR{submission_round}"
			
 
				+
			
 
				+    print(f"Downloading data for {round_name} BTRs")
			
 
				+
			
 
				+    error_file_sizes = [212, 210]
			
 
				+
			
 
				+    # Read submissions list
			
 
				+    submissions = pd.read_csv(
			
 
				+        downloaded_data_path_UNFCCC / f"submissions-{dataset}.csv"
			
 
				+    )
			
 
				+
			
 
				+    # set options for headless mode
			
 
				+    profile_path = ".firefox"
			
 
				+    options = Options()
			
 
				+    # options.add_argument('-headless')
			
 
				+
			
 
				+    # create profile for headless mode and automatic downloading
			
 
				+    options.set_preference("profile", profile_path)
			
 
				+    options.set_preference("browser.download.folderList", 2)
			
 
				+
			
 
				+    # set up selenium driver
			
 
				+    driver = Firefox(options=options)
			
 
				+    # visit the main data page once to create cookies
			
 
				+    driver.get(url)
			
 
				+
			
 
				+    # wait a bit for the website to load before we get the cookies
			
 
				+    time.sleep(20)
			
 
				+
			
 
				+    # get the session id cookie
			
 
				+    cookies_selenium = driver.get_cookies()
			
 
				+    cookies = {}
			
 
				+    for cookie in cookies_selenium:
			
 
				+        cookies[cookie["name"]] = cookie["value"]
			
 
				+
			
 
				+    new_downloaded = []
			
 
				+
			
 
				+    for idx, submission in submissions.iterrows():
			
 
				+        print("=" * 60)
			
 
				+        title = submission.Title
			
 
				+        url = submission.URL
			
 
				+        country = submission.Country
			
 
				+        country = country.replace(" ", "_")
			
 
				+        print(f"Downloading {title} from {url}")
			
 
				+
			
 
				+        country_folder = downloaded_data_path_UNFCCC / country
			
 
				+        if not country_folder.exists():
			
 
				+            country_folder.mkdir()
			
 
				+        local_filename = (
			
 
				+            country_folder
			
 
				+            / dataset
			
 
				+            / url.split("/")[-1].replace("%20", "_").replace(" ", "_")
			
 
				+        )
			
 
				+        if not local_filename.parent.exists():
			
 
				+            local_filename.parent.mkdir()
			
 
				+
			
 
				+        if local_filename.exists():
			
 
				             # check file size. if 210 or 212 bytes it's the error page
			
 
				             if Path(local_filename).stat().st_size in error_file_sizes:
			
 
				                 # found the error page. delete file
			
 
				                 os.remove(local_filename)
			
 
				 
			
 
				-            # sleep a bit to avoid running into captchas
			
 
				-            time.sleep(randrange(5, 15))  # noqa: S311
			
 
				-
			
 
				-        if local_filename.exists():
			
 
				-            new_downloaded.append(submission)
			
 
				-            print(f"Download => {local_filename.relative_to(root_path)}")
			
 
				-            # unzip data (only for new downloads)
			
 
				-            if local_filename.suffix == ".zip":
			
 
				-                try:
			
 
				-                    zipped_file = zipfile.ZipFile(str(local_filename), "r")
			
 
				-                    zipped_file.extractall(str(local_filename.parent))
			
 
				-                    print(f"Extracted {len(zipped_file.namelist())} files.")
			
 
				-                    zipped_file.close()
			
 
				-                # TODO Better error logging/visibilty
			
 
				-                except zipfile.BadZipFile:
			
 
				+        # now we have removed error pages, so a present file should not be overwritten
			
 
				+        if (not local_filename.exists()) and (not local_filename.is_symlink()):
			
 
				+            i = 0  # reset counter
			
 
				+            while not local_filename.exists() and i < 10:  # noqa: PLR2004
			
 
				+                # for i = 0 and i = 5 try to get a new session ID
			
 
				+                if i in (1, 5):
			
 
				+                    driver = Firefox(options=options)
			
 
				+
			
 
				+                    # visit the main data page once to create cookies
			
 
				+                    driver.get(url)
			
 
				+                    time.sleep(20)
			
 
				+
			
 
				+                    # get the session id cookie
			
 
				+                    cookies_selenium = driver.get_cookies()
			
 
				+                    cookies = {}
			
 
				+                    for cookie in cookies_selenium:
			
 
				+                        cookies[cookie["name"]] = cookie["value"]
			
 
				+
			
 
				+                r = requests.get(url, stream=True, cookies=cookies)  # noqa: S113
			
 
				+                with open(str(local_filename), "wb") as f:
			
 
				+                    shutil.copyfileobj(r.raw, f)
			
 
				+
			
 
				+                # check file size. if 210 or 212 bytes it's the error page
			
 
				+                if Path(local_filename).stat().st_size in error_file_sizes:
			
 
				+                    # found the error page. delete file
			
 
				+                    os.remove(local_filename)
			
 
				+
			
 
				+                # sleep a bit to avoid running into captchas
			
 
				+                time.sleep(randrange(5, 15))  # noqa: S311
			
 
				+
			
 
				+            if local_filename.exists():
			
 
				+                new_downloaded.append(submission)
			
 
				+                print(f"Download => {local_filename.relative_to(root_path)}")
			
 
				+                # unzip data (only for new downloads)
			
 
				+                if local_filename.suffix == ".zip":
			
 
				+                    try:
			
 
				+                        zipped_file = zipfile.ZipFile(str(local_filename), "r")
			
 
				+                        zipped_file.extractall(str(local_filename.parent))
			
 
				+                        print(f"Extracted {len(zipped_file.namelist())} files.")
			
 
				+                        zipped_file.close()
			
 
				+                    # TODO Better error logging/visibilty
			
 
				+                    except zipfile.BadZipFile:
			
 
				+                        print(
			
 
				+                            f"Error while trying to extract "
			
 
				+                            f"{local_filename.relative_to(root_path)}"
			
 
				+                        )
			
 
				+                    except NotImplementedError:
			
 
				+                        print(
			
 
				+                            "Zip format not supported, please unzip on the command "
			
 
				+                            "line."
			
 
				+                        )
			
 
				+                else:
			
 
				                     print(
			
 
				-                        f"Error while trying to extract "
			
 
				-                        f"{local_filename.relative_to(root_path)}"
			
 
				+                        f"Not attempting to extract "
			
 
				+                        f"{local_filename.relative_to(root_path)}."
			
 
				                     )
			
 
				-                except NotImplementedError:
			
 
				-                    print("Zip format not supported, please unzip on the command line.")
			
 
				             else:
			
 
				-                print(
			
 
				-                    f"Not attempting to extract "
			
 
				-                    f"{local_filename.relative_to(root_path)}."
			
 
				-                )
			
 
				-        else:
			
 
				-            print(f"Failed to download {local_filename.relative_to(root_path)}")
			
 
				+                print(f"Failed to download {local_filename.relative_to(root_path)}")
			
 
				 
			
 
				-    else:
			
 
				-        print(f"=> Already downloaded {local_filename.relative_to(root_path)}")
			
 
				+        else:
			
 
				+            print(f"=> Already downloaded {local_filename.relative_to(root_path)}")
			
 
				 
			
 
				-driver.close()
			
 
				+    driver.close()
			
 
				 
			
 
				-df_new_downloads = pd.DataFrame(new_downloaded)
			
 
				-df_new_downloads.to_csv(
			
 
				-    downloaded_data_path_UNFCCC / f"00_new_downloads_{dataset}-{date.today()}.csv",
			
 
				-    index=False,
			
 
				-)
			
 
				+    df_new_downloads = pd.DataFrame(new_downloaded)
			
 
				+    df_new_downloads.to_csv(
			
 
				+        downloaded_data_path_UNFCCC / f"00_new_downloads_{dataset}-{date.today()}.csv",
			
 
				+        index=False,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_btr.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_btr.py
@@ -17,90 +17,93 @@ from unfccc_ghg_data.unfccc_downloader import (
 
				     get_unfccc_submission_info,
			
 
				 )
			
 
				 
			
 
				-max_tries = 10
			
 
				-
			
 
				-descr = (
			
 
				-    "Download UNFCCC Biannial Transparency Reports Submissions lists "
			
 
				-    "and create list of submissions as CSV file. Based on "
			
 
				-    "process.py from national-inventory-submissions "
			
 
				-    "(https://github.com/openclimatedata/national-inventory-submisions)"
			
 
				-)
			
 
				-parser = argparse.ArgumentParser(description=descr)
			
 
				-parser.add_argument("--round", help="1 for first BTRs, 2 for second BTRs etc.")
			
 
				-
			
 
				-args = parser.parse_args()
			
 
				-submission_round = int(args.round)
			
 
				-
			
 
				-round_name, url = get_BTR_name_and_URL(submission_round)
			
 
				-
			
 
				-print(f"Fetching submissions for {round_name} BTRs")
			
 
				-print(f"Using {url} to get submissions list")
			
 
				-
			
 
				-# set options for headless mode
			
 
				-profile_path = ".firefox"
			
 
				-options = Options()
			
 
				-options.add_argument("-headless")
			
 
				+if __name__ == "__main__":
			
 
				+    max_tries = 10
			
 
				+
			
 
				+    descr = (
			
 
				+        "Download UNFCCC Biannial Transparency Reports Submissions lists "
			
 
				+        "and create list of submissions as CSV file. Based on "
			
 
				+        "process.py from national-inventory-submissions "
			
 
				+        "(https://github.com/openclimatedata/national-inventory-submisions)"
			
 
				+    )
			
 
				+    parser = argparse.ArgumentParser(description=descr)
			
 
				+    parser.add_argument("--round", help="1 for first BTRs, 2 for second BTRs etc.")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    submission_round = int(args.round)
			
 
				+
			
 
				+    round_name, url = get_BTR_name_and_URL(submission_round)
			
 
				+
			
 
				+    print(f"Fetching submissions for {round_name} BTRs")
			
 
				+    print(f"Using {url} to get submissions list")
			
 
				+
			
 
				+    # set options for headless mode
			
 
				+    profile_path = ".firefox"
			
 
				+    options = Options()
			
 
				+    options.add_argument("-headless")
			
 
				+
			
 
				+    # create profile for headless mode and automatic downloading
			
 
				+    options.set_preference("profile", profile_path)
			
 
				+
			
 
				+    # set up selenium driver
			
 
				+    driver = Firefox(options=options)
			
 
				+    driver.get(url)
			
 
				+
			
 
				+    html = BeautifulSoup(driver.page_source, "html.parser")
			
 
				+
			
 
				+    table = html.find("table")
			
 
				+
			
 
				+    # check if table found. if not the get command didn't work, likely because of a
			
 
				+    # captcha on the site
			
 
				+    ### TODO replace by error message
			
 
				+    if not table:
			
 
				+        raise RuntimeError(  # noqa: TRY003
			
 
				+            "No table found on URL. Possibly due to a captcha."
			
 
				+        )
			
 
				+
			
 
				+    links = table.findAll("a")
			
 
				+
			
 
				+    targets = []  # sub-pages
			
 
				+    downloads = []
			
 
				+    no_downloads = []
			
 
				+
			
 
				+    # Check links for Zipfiles or subpages
			
 
				+    for link in links:
			
 
				+        if "href" not in link.attrs:
			
 
				+            continue
			
 
				+        href = link.attrs["href"]
			
 
				+        if "/documents/" in href:
			
 
				+            if "title" in link.attrs.keys():
			
 
				+                title = link.attrs["title"]
			
 
				+            else:
			
 
				+                title = link.contents[0]
			
 
				+            if href.startswith("/documents"):
			
 
				+                href = "https://unfccc.int" + href
			
 
				+            # Only add pages in the format https://unfccc.int/documents/65587
			
 
				+            # to further downloads
			
 
				+            if str(Path(href).parent).endswith("documents"):
			
 
				+                targets.append({"title": title, "url": href})
			
 
				+        else:
			
 
				+            print(f"Ignored link: {href}: not in the right format.")
			
 
				 
			
 
				-# create profile for headless mode and automatic downloading
			
 
				-options.set_preference("profile", profile_path)
			
 
				+    # Go through sub-pages.
			
 
				+    for target in targets:
			
 
				+        time.sleep(randrange(5, 15))  # noqa: S311
			
 
				+        url = target["url"]
			
 
				 
			
 
				-# set up selenium driver
			
 
				-driver = Firefox(options=options)
			
 
				-driver.get(url)
			
 
				+        submission_info = get_unfccc_submission_info(url, driver, max_tries=max_tries)
			
 
				 
			
 
				-html = BeautifulSoup(driver.page_source, "html.parser")
			
 
				+        if submission_info:
			
 
				+            downloads = downloads + submission_info
			
 
				+        else:
			
 
				+            no_downloads.append({target["title"], url})
			
 
				 
			
 
				-table = html.find("table")
			
 
				+    if len(no_downloads) > 0:
			
 
				+        print("No downloads for ", no_downloads)
			
 
				 
			
 
				-# check if table found. if not the get command didn't work, likely because of a captcha on the site
			
 
				-### TODO replace by error message
			
 
				-if not table:
			
 
				-    raise RuntimeError(  # noqa: TRY003
			
 
				-        "No table found on URL. Possibly due to a captcha."
			
 
				+    driver.close()
			
 
				+    df_downloads = pd.DataFrame(downloads)
			
 
				+    df_downloads.to_csv(
			
 
				+        downloaded_data_path_UNFCCC / f"submissions-BTR{submission_round}.csv",
			
 
				+        index=False,
			
 
				     )
			
 
				-
			
 
				-links = table.findAll("a")
			
 
				-
			
 
				-targets = []  # sub-pages
			
 
				-downloads = []
			
 
				-no_downloads = []
			
 
				-
			
 
				-# Check links for Zipfiles or subpages
			
 
				-for link in links:
			
 
				-    if "href" not in link.attrs:
			
 
				-        continue
			
 
				-    href = link.attrs["href"]
			
 
				-    if "/documents/" in href:
			
 
				-        if "title" in link.attrs.keys():
			
 
				-            title = link.attrs["title"]
			
 
				-        else:
			
 
				-            title = link.contents[0]
			
 
				-        if href.startswith("/documents"):
			
 
				-            href = "https://unfccc.int" + href
			
 
				-        # Only add pages in the format https://unfccc.int/documents/65587
			
 
				-        # to further downloads
			
 
				-        if str(Path(href).parent).endswith("documents"):
			
 
				-            targets.append({"title": title, "url": href})
			
 
				-    else:
			
 
				-        print(f"Ignored link: {href}: not in the right format.")
			
 
				-
			
 
				-# Go through sub-pages.
			
 
				-for target in targets:
			
 
				-    time.sleep(randrange(5, 15))  # noqa: S311
			
 
				-    url = target["url"]
			
 
				-
			
 
				-    submission_info = get_unfccc_submission_info(url, driver, max_tries=max_tries)
			
 
				-
			
 
				-    if submission_info:
			
 
				-        downloads = downloads + submission_info
			
 
				-    else:
			
 
				-        no_downloads.append({target["title"], url})
			
 
				-
			
 
				-if len(no_downloads) > 0:
			
 
				-    print("No downloads for ", no_downloads)
			
 
				-
			
 
				-driver.close()
			
 
				-df_downloads = pd.DataFrame(downloads)
			
 
				-df_downloads.to_csv(
			
 
				-    downloaded_data_path_UNFCCC / f"submissions-BTR{submission_round}.csv", index=False
			
 
				-)
			
--- a/src/unfccc_ghg_data/unfccc_downloader/unfccc_submission_info.py
+++ b/src/unfccc_ghg_data/unfccc_downloader/unfccc_submission_info.py
@@ -34,10 +34,10 @@ def get_unfccc_submission_info(  # noqa: PLR0912, PLR0915
 
				     -------
			
 
				     A list with information for each downloadable files linked on the submission page.
			
 
				     For each file the information ins stored in a dict with the fields
			
 
				-        "Kind": kind,
			
 
				-        "Country": country,
			
 
				-        "Title": title,
			
 
				-        "URL": file,
			
 
				+    "Kind": kind,
			
 
				+    "Country": country,
			
 
				+    "Title": title,
			
 
				+    "URL": file,
			
 
				 
			
 
				     """
			
 
				     info = []
			
@@ -143,7 +143,7 @@ def get_unfccc_submission_info(  # noqa: PLR0912, PLR0915
 
				 
			
 
				 def get_BTR_name_and_URL(submission_round: int) -> (str, str):
			
 
				     """
			
 
				-        Get the name and URL of a BTR for a given number
			
 
				+    Get the name and URL of a BTR for a given number
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
--- a/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR5_from_csv.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR5_from_csv.py
@@ -13,7 +13,8 @@ license probably CC-BY 4.0
 
				 * Data are in long format. Columns needed are
			
 
				 'año' 'id_ipcc' 'tipo_de_gas' 'valor_en_toneladas_de_gas'
			
 
				 * columns to irgnore are
			
 
				-columns_to_ignore = ['sector', 'actividad', 'subactividad', 'categoria', 'valor_en_toneladas_de_co2e']
			
 
				+columns_to_ignore = ['sector', 'actividad', 'subactividad', 'categoria',
			
 
				+'valor_en_toneladas_de_co2e']
			
 
				 * sector codes are in primap1 format (no dots), reading should be possible
			
 
				 directly from CSV into interchange format
			
 
				 * postprocessing needed is aggregation of gas baskets and categories as only
			
@@ -23,7 +24,16 @@ the highest detail categories are present
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_arg_bur5 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    compression,
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_arg_bur5 import (
			
 
				     cats_to_agg,
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
@@ -37,106 +47,99 @@ from config_arg_bur5 import (
 
				     unit,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    compression,
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    gas_baskets,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				-
			
 
				-# folders and files
			
 
				-input_folder = downloaded_data_path / "UNFCCC" / "Argentina" / "BUR5"
			
 
				-output_folder = extracted_data_path / "UNFCCC" / "Argentina"
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				-output_filename = "ARG_BUR5_2023_"
			
 
				-
			
 
				-csv_file = "emisiones_gei_inventario_datos_totales_1990_2020.csv"
			
 
				-
			
 
				-
			
 
				-# read the data
			
 
				-data_pd = pd.read_csv(
			
 
				-    input_folder / csv_file,
			
 
				-    sep=";",
			
 
				-    parse_dates=[coords_cols["time"]],
			
 
				-    usecols=list(coords_cols.values()),
			
 
				-)
			
 
				-
			
 
				-data_pd["unit"] = unit
			
 
				-coords_cols["unit"] = "unit"
			
 
				-
			
 
				-data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    data_pd,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    coords_value_filling=coords_value_filling,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    filter_remove=filter_remove,
			
 
				-    filter_keep=filter_keep,
			
 
				-    meta_data=meta_data,
			
 
				-    time_format=time_format,
			
 
				-)
			
 
				-
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-# ###
			
 
				-# save data to IF and native format
			
 
				-# ###
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-    data_if,
			
 
				-)
			
 
				-
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    output_folder
			
 
				-    / (output_filename + coords_terminologies["category"] + "_raw" + ".nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				-
			
 
				-### processing
			
 
				-data_proc_pm2 = data_pm2
			
 
				-
			
 
				-# actual processing
			
 
				-country_processing = {
			
 
				-    "aggregate_cats": cats_to_agg,
			
 
				-}
			
 
				-data_proc_pm2 = process_data_for_country(
			
 
				-    data_proc_pm2,
			
 
				-    entities_to_ignore=[],
			
 
				-    gas_baskets=gas_baskets,
			
 
				-    processing_info_country=country_processing,
			
 
				-)
			
 
				-
			
 
				-# adapt source and metadata
			
 
				-current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				-data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				-data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				-data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
			
 
				-
			
 
				-# ###
			
 
				-# save data to IF and native format
			
 
				-# ###
			
 
				-data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"]), data_proc_if
			
 
				-)
			
 
				-
			
 
				-encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				-data_proc_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+
			
 
				+    # folders and files
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Argentina" / "BUR5"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Argentina"
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    output_filename = "ARG_BUR5_2023_"
			
 
				+
			
 
				+    csv_file = "emisiones_gei_inventario_datos_totales_1990_2020.csv"
			
 
				+
			
 
				+    # read the data
			
 
				+    data_pd = pd.read_csv(
			
 
				+        input_folder / csv_file,
			
 
				+        sep=";",
			
 
				+        parse_dates=[coords_cols["time"]],
			
 
				+        usecols=list(coords_cols.values()),
			
 
				+    )
			
 
				+
			
 
				+    data_pd["unit"] = unit
			
 
				+    coords_cols["unit"] = "unit"
			
 
				+
			
 
				+    data_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        data_pd,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        coords_value_filling=coords_value_filling,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        filter_remove=filter_remove,
			
 
				+        filter_keep=filter_keep,
			
 
				+        meta_data=meta_data,
			
 
				+        time_format=time_format,
			
 
				+    )
			
 
				+
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+    # ###
			
 
				+    # save data to IF and native format
			
 
				+    # ###
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				+
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw" + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				+
			
 
				+    ### processing
			
 
				+    data_proc_pm2 = data_pm2
			
 
				+
			
 
				+    # actual processing
			
 
				+    country_processing = {
			
 
				+        "aggregate_cats": cats_to_agg,
			
 
				+    }
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_proc_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        processing_info_country=country_processing,
			
 
				+    )
			
 
				+
			
 
				+    # adapt source and metadata
			
 
				+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
			
 
				+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
			
 
				+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
			
 
				+
			
 
				+    # ###
			
 
				+    # save data to IF and native format
			
 
				+    # ###
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]),
			
 
				+        data_proc_if,
			
 
				+    )
			
 
				+
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Burundi/read_BDI_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Burundi/read_BDI_BUR1_from_pdf.py
@@ -4,7 +4,11 @@ Read Burundi's BUR1 from pdf
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_bdi_bur1 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from unfccc_ghg_data.helper.functions import process_data_for_country
			
 
				+
			
 
				+from .config_bdi_bur1 import (
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
 
				     coords_terminologies,
			
@@ -17,212 +21,208 @@ from config_bdi_bur1 import (
 
				     meta_data,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from unfccc_ghg_data.helper.functions import process_data_for_country
			
 
				-
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				-
			
 
				-input_folder = downloaded_data_path / "UNFCCC" / "Burundi" / "BUR1"
			
 
				-output_folder = extracted_data_path / "UNFCCC" / "Burundi"
			
 
				-
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				-pdf_file = "Burundi_BUR_1_Report__Francais.pdf"
			
 
				-output_filename = "BDI_BUR1_2023_"
			
 
				-category_column = f"category ({coords_terminologies['category']})"
			
 
				-compression = dict(zlib=True, complevel=9)
			
 
				-
			
 
				-# ###
			
 
				-# 1. Read in tables
			
 
				-# ###
			
 
				-
			
 
				-df_all = None
			
 
				-for year in inv_conf_per_year.keys():
			
 
				-    print("-" * 60)
			
 
				-    print(f"Reading year {year}.")
			
 
				-    print("-" * 60)
			
 
				-    df_year = None
			
 
				-    for page in inv_conf_per_year[year]["pages_to_read"]:
			
 
				-        print(f"Reading table from page {page}.")
			
 
				-        tables_inventory_original = camelot.read_pdf(
			
 
				-            str(input_folder / pdf_file),
			
 
				-            pages=page,
			
 
				-            flavor="lattice",
			
 
				-            split_text=True,
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Burundi" / "BUR1"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Burundi"
			
 
				+
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    pdf_file = "Burundi_BUR_1_Report__Francais.pdf"
			
 
				+    output_filename = "BDI_BUR1_2023_"
			
 
				+    category_column = f"category ({coords_terminologies['category']})"
			
 
				+    compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+    # ###
			
 
				+    # 1. Read in tables
			
 
				+    # ###
			
 
				+
			
 
				+    df_all = None
			
 
				+    for year in inv_conf_per_year.keys():
			
 
				+        print("-" * 60)
			
 
				+        print(f"Reading year {year}.")
			
 
				+        print("-" * 60)
			
 
				+        df_year = None
			
 
				+        for page in inv_conf_per_year[year]["pages_to_read"]:
			
 
				+            print(f"Reading table from page {page}.")
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                flavor="lattice",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+            print("Reading complete.")
			
 
				+
			
 
				+            df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+            if df_year is None:
			
 
				+                df_year = df_page
			
 
				+            else:
			
 
				+                df_year = pd.concat(
			
 
				+                    [df_year, df_page],
			
 
				+                    axis=0,
			
 
				+                    join="outer",
			
 
				+                ).reset_index(drop=True)
			
 
				+
			
 
				+        print(f"Concatenating all tables for {year}.")
			
 
				+        # remove line breaks
			
 
				+        for column in df_year.columns:
			
 
				+            df_year[column] = df_year[column].str.replace("\n", "")
			
 
				+
			
 
				+        # fix broken values in cells
			
 
				+        if "fix_values" in inv_conf_per_year[year].keys():
			
 
				+            for index, column, value in inv_conf_per_year[year]["fix_values"]:
			
 
				+                df_year.loc[index, column] = value
			
 
				+
			
 
				+        # delete extra columns
			
 
				+        if "delete_columns" in inv_conf_per_year[year].keys():
			
 
				+            for column in inv_conf_per_year[year]["delete_columns"]:
			
 
				+                df_year = df_year.drop(columns=column)
			
 
				+            df_year.columns = range(df_year.columns.size)
			
 
				+
			
 
				+        df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+
			
 
				+        df_year = pd.concat([df_header, df_year[2:]], axis=0, join="outer").reset_index(
			
 
				+            drop=True
			
 
				         )
			
 
				-        print("Reading complete.")
			
 
				 
			
 
				-        df_page = tables_inventory_original[0].df
			
 
				+        df_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_year,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				+        )
			
 
				 
			
 
				-        if df_year is None:
			
 
				-            df_year = df_page
			
 
				-        else:
			
 
				-            df_year = pd.concat(
			
 
				-                [df_year, df_page],
			
 
				-                axis=0,
			
 
				-                join="outer",
			
 
				-            ).reset_index(drop=True)
			
 
				+        print("Added unit information.")
			
 
				 
			
 
				-    print(f"Concatenating all tables for {year}.")
			
 
				-    # remove line breaks
			
 
				-    for column in df_year.columns:
			
 
				-        df_year[column] = df_year[column].str.replace("\n", "")
			
 
				+        # set index
			
 
				+        df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-    # fix broken values in cells
			
 
				-    if "fix_values" in inv_conf_per_year[year].keys():
			
 
				-        for index, column, value in inv_conf_per_year[year]["fix_values"]:
			
 
				-            df_year.loc[index, column] = value
			
 
				+        # convert to long format
			
 
				+        df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_year, year, inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-    # delete extra columns
			
 
				-    if "delete_columns" in inv_conf_per_year[year].keys():
			
 
				-        for column in inv_conf_per_year[year]["delete_columns"]:
			
 
				-            df_year = df_year.drop(columns=column)
			
 
				-        df_year.columns = range(df_year.columns.size)
			
 
				+        # extract from tuple
			
 
				+        df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				 
			
 
				-    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        # make a copy of the categories row
			
 
				+        df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				 
			
 
				-    df_year = pd.concat([df_header, df_year[2:]], axis=0, join="outer").reset_index(
			
 
				-        drop=True
			
 
				-    )
			
 
				+        # replace cat names by codes in col "category"
			
 
				+        # first the manual replacements
			
 
				+        df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
			
 
				 
			
 
				-    df_year = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_year,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				-    )
			
 
				+        df_year_long["category"] = df_year_long["category"].replace(
			
 
				+            inv_conf["cat_codes_manual"]
			
 
				+        )
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				+        df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				 
			
 
				-    # set index
			
 
				-    df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				+        # then the regex replacements
			
 
				+        def repl(m):  # noqa: D103
			
 
				+            return m.group("code")
			
 
				 
			
 
				-    # convert to long format
			
 
				-    df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_year, year, inv_conf["header_long"]
			
 
				-    )
			
 
				+        df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    # extract from tuple
			
 
				-    df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				+        df_year_long = df_year_long.reset_index(drop=True)
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    # make a copy of the categories row
			
 
				-    df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				+        df_year_long["data"] = df_year_long["data"].str.replace(",", ".")
			
 
				 
			
 
				-    # replace cat names by codes in col "category"
			
 
				-    # first the manual replacements
			
 
				-    df_year_long["category"] = df_year_long["category"].str.replace("\n", "")
			
 
				+        # TODO: I don't think there are NE1 in the tables.
			
 
				+        # df_year_long["data"] = df_year_long["data"].str.replace("NE1", "NE")
			
 
				 
			
 
				-    df_year_long["category"] = df_year_long["category"].replace(
			
 
				-        inv_conf["cat_codes_manual"]
			
 
				-    )
			
 
				+        # make sure all col headers are str
			
 
				+        df_year_long.columns = df_year_long.columns.map(str)
			
 
				 
			
 
				-    df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				+        df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				 
			
 
				-    # then the regex replacements
			
 
				-    def repl(m):  # noqa: D103
			
 
				-        return m.group("code")
			
 
				+        if df_all is None:
			
 
				+            df_all = df_year_long
			
 
				+        else:
			
 
				+            df_all = pd.concat(
			
 
				+                [df_all, df_year_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				 
			
 
				-    df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    ### convert to interchange format ###
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_all,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_year_long = df_year_long.reset_index(drop=True)
			
 
				-
			
 
				-    df_year_long["data"] = df_year_long["data"].str.replace(",", ".")
			
 
				-
			
 
				-    # TODO: I don't think there are NE1 in the tables.
			
 
				-    # df_year_long["data"] = df_year_long["data"].str.replace("NE1", "NE")
			
 
				-
			
 
				-    # make sure all col headers are str
			
 
				-    df_year_long.columns = df_year_long.columns.map(str)
			
 
				-
			
 
				-    df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				-
			
 
				-    if df_all is None:
			
 
				-        df_all = df_year_long
			
 
				-    else:
			
 
				-        df_all = pd.concat(
			
 
				-            [df_all, df_year_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				-
			
 
				-### convert to interchange format ###
			
 
				-print("Converting to interchange format.")
			
 
				-df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_all,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				+    ### convert to primap2 format ###
			
 
				+    print("Converting to primap2 format.")
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				 
			
 
				-### convert to primap2 format ###
			
 
				-print("Converting to primap2 format.")
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+    # ###
			
 
				+    # Save raw data to IF and native format.
			
 
				+    # ###
			
 
				 
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-# ###
			
 
				-# Save raw data to IF and native format.
			
 
				-# ###
			
 
				-
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-    data_if,
			
 
				-)
			
 
				-
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				-
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				-# ###
			
 
				-# Processing
			
 
				-# ###
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				-data_proc_pm2 = process_data_for_country(
			
 
				-    data_country=data_pm2,
			
 
				-    entities_to_ignore=[],
			
 
				-    gas_baskets=gas_baskets,
			
 
				-    filter_dims=None,
			
 
				-    cat_terminology_out=None,
			
 
				-    category_conversion=None,
			
 
				-    sectors_out=None,
			
 
				-    processing_info_country=country_processing_step1,
			
 
				-)
			
 
				+    # ###
			
 
				+    # Processing
			
 
				+    # ###
			
 
				+
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_country=data_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        filter_dims=None,
			
 
				+        cat_terminology_out=None,
			
 
				+        category_conversion=None,
			
 
				+        sectors_out=None,
			
 
				+        processing_info_country=country_processing_step1,
			
 
				+    )
			
 
				 
			
 
				-# ###
			
 
				-# save processed data to IF and native format
			
 
				-# ###
			
 
				+    # ###
			
 
				+    # save processed data to IF and native format
			
 
				+    # ###
			
 
				 
			
 
				-terminology_proc = coords_terminologies["category"]
			
 
				+    terminology_proc = coords_terminologies["category"]
			
 
				 
			
 
				-data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				-)
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				-encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				-data_proc_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				-)
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
 
				 
			
 
				-print("Saved processed data.")
			
 
				+    print("Saved processed data.")
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
@@ -10,11 +10,12 @@ import sys
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				 from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				+from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
@@ -182,12 +183,14 @@ if __name__ == "__main__":
 
				             nrows=442,
			
 
				             engine="openpyxl",
			
 
				         )
			
 
				-        # drop the columns which are empty and repetition of the metadata for the second block
			
 
				+        # drop the columns which are empty and repetition of the metadata for the
			
 
				+        # second block
			
 
				         df_current = df_current.drop(cols_to_drop, axis=1)
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				         df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				-        # set multi-index. necessary for the stack operation in the conversion to long format
			
 
				+        # set multi-index. necessary for the stack operation in the conversion to long
			
 
				+        # format
			
 
				         df_current = df_current.set_index(index_cols)
			
 
				         # add unit row using information from entity row and add to index
			
 
				         df_current = pm2.pm2io.nir_add_unit_information(
			
--- a/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
@@ -10,11 +10,12 @@ import sys
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				 from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				+from .config_chl_bur4 import aggregate_cats, cat_mapping, filter_remove_IPCC2006
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
@@ -186,12 +187,14 @@ if __name__ == "__main__":
 
				             nrows=442,
			
 
				             engine="openpyxl",
			
 
				         )
			
 
				-        # drop the columns which are empty and repetition of the metadata for the second block
			
 
				+        # drop the columns which are empty and repetition of the metadata for the
			
 
				+        # second block
			
 
				         df_current = df_current.drop(cols_to_drop, axis=1)
			
 
				         # drop all rows where the index cols (category code and name) are both NaN
			
 
				         # as without one of them there is no category information
			
 
				         df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				-        # set multi-index. necessary for the stack operation in the conversion to long format
			
 
				+        # set multi-index. necessary for the stack operation in the conversion to
			
 
				+        # long format
			
 
				         df_current = df_current.set_index(index_cols)
			
 
				         # add unit row using information from entity row and add to index
			
 
				         df_current = pm2.pm2io.nir_add_unit_information(
			
--- a/src/unfccc_ghg_data/unfccc_reader/Guinea/read_GIN_BUR1_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Guinea/read_GIN_BUR1_from_pdf.py
@@ -4,7 +4,12 @@ Read Guinea's BUR1 from pdf
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_gin_bur1 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+from unfccc_ghg_data.helper.functions import process_data_for_country
			
 
				+from unfccc_ghg_data.helper.functions_temp import find_and_replace_values
			
 
				+
			
 
				+from .config_gin_bur1 import (
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
 
				     coords_terminologies,
			
@@ -22,651 +27,664 @@ from config_gin_bur1 import (
 
				     set_value,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				-from unfccc_ghg_data.helper.functions import process_data_for_country
			
 
				-from unfccc_ghg_data.helper.functions_temp import find_and_replace_values
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				 
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Guinea" / "BUR1"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Guinea"
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				 
			
 
				-input_folder = downloaded_data_path / "UNFCCC" / "Guinea" / "BUR1"
			
 
				-output_folder = extracted_data_path / "UNFCCC" / "Guinea"
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				+    pdf_file = "Rapport_IGES-Guinee-BUR1_VF.pdf"
			
 
				+    output_filename = "GIN_BUR1_2023_"
			
 
				+    category_column = f"category ({coords_terminologies['category']})"
			
 
				+    compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				-pdf_file = "Rapport_IGES-Guinee-BUR1_VF.pdf"
			
 
				-output_filename = "GIN_BUR1_2023_"
			
 
				-category_column = f"category ({coords_terminologies['category']})"
			
 
				-compression = dict(zlib=True, complevel=9)
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				 
			
 
				+    # ###
			
 
				+    # 1. Read in main tables
			
 
				+    # ###
			
 
				 
			
 
				-def repl(m):  # noqa: D103
			
 
				-    return m.group("code")
			
 
				+    df_main = None
			
 
				+    for page in inv_conf["pages_to_read"]["main"]:
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading table from page {page}.")
			
 
				 
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file),
			
 
				+            pages=page,
			
 
				+            table_areas=page_def_templates[page]["area"],
			
 
				+            columns=page_def_templates[page]["cols"],
			
 
				+            flavor="stream",
			
 
				+            split_text=True,
			
 
				+        )
			
 
				 
			
 
				-# ###
			
 
				-# 1. Read in main tables
			
 
				-# ###
			
 
				+        print("Reading complete.")
			
 
				 
			
 
				-df_main = None
			
 
				-for page in inv_conf["pages_to_read"]["main"]:
			
 
				-    print("-" * 45)
			
 
				-    print(f"Reading table from page {page}.")
			
 
				+        df_inventory = tables_inventory_original[0].df.copy()
			
 
				 
			
 
				-    tables_inventory_original = camelot.read_pdf(
			
 
				-        str(input_folder / pdf_file),
			
 
				-        pages=page,
			
 
				-        table_areas=page_def_templates[page]["area"],
			
 
				-        columns=page_def_templates[page]["cols"],
			
 
				-        flavor="stream",
			
 
				-        split_text=True,
			
 
				-    )
			
 
				+        # set category names (they moved one row up)
			
 
				+        if page in set_value["main"].keys():
			
 
				+            for idx, col, value in set_value["main"][page]:
			
 
				+                df_inventory.loc[idx, col] = value
			
 
				+        # delete empty row
			
 
				+        if page in delete_row["main"].keys():
			
 
				+            for idx in delete_row["main"][page]:
			
 
				+                df_inventory = df_inventory.drop(index=idx)
			
 
				 
			
 
				-    print("Reading complete.")
			
 
				-
			
 
				-    df_inventory = tables_inventory_original[0].df.copy()
			
 
				-
			
 
				-    # set category names (they moved one row up)
			
 
				-    if page in set_value["main"].keys():
			
 
				-        for idx, col, value in set_value["main"][page]:
			
 
				-            df_inventory.loc[idx, col] = value
			
 
				-    # delete empty row
			
 
				-    if page in delete_row["main"].keys():
			
 
				-        for idx in delete_row["main"][page]:
			
 
				-            df_inventory = df_inventory.drop(index=idx)
			
 
				-
			
 
				-    # add header and unit
			
 
				-    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				-    df_inventory = pd.concat(
			
 
				-        [df_header, df_inventory], axis=0, join="outer"
			
 
				-    ).reset_index(drop=True)
			
 
				-    df_inventory = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_inventory,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				-    )
			
 
				+        # add header and unit
			
 
				+        df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+        df_inventory = pd.concat(
			
 
				+            [df_header, df_inventory], axis=0, join="outer"
			
 
				+        ).reset_index(drop=True)
			
 
				+        df_inventory = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_inventory,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				+        )
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				+        print("Added unit information.")
			
 
				 
			
 
				-    # set index
			
 
				-    df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				+        # set index
			
 
				+        df_inventory = df_inventory.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-    # convert to long format
			
 
				-    df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_inventory, inv_conf["year"][page], inv_conf["header_long"]
			
 
				-    )
			
 
				+        # convert to long format
			
 
				+        df_inventory_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_inventory, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-    # extract category from tuple
			
 
				-    df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				+        # extract category from tuple
			
 
				+        df_inventory_long["orig_cat_name"] = df_inventory_long["orig_cat_name"].str[0]
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
			
 
				 
			
 
				-    df_inventory_long["category"] = df_inventory_long["category"].replace(
			
 
				-        inv_conf["cat_codes_manual"]["main"]
			
 
				-    )
			
 
				+        df_inventory_long["category"] = df_inventory_long["category"].replace(
			
 
				+            inv_conf["cat_codes_manual"]["main"]
			
 
				+        )
			
 
				+
			
 
				+        df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				+            ".", ""
			
 
				+        )
			
 
				+
			
 
				+        # regex replacements
			
 
				+        df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    df_inventory_long["category"] = df_inventory_long["category"].str.replace(".", "")
			
 
				+        df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				+
			
 
				+        df_inventory_long["data"] = df_inventory_long["data"].str.replace(",", ".")
			
 
				+        df_inventory_long["data"] = df_inventory_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+        # make sure all col headers are str
			
 
				+        df_inventory_long.columns = df_inventory_long.columns.map(str)
			
 
				+        df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+        if df_main is None:
			
 
				+            df_main = df_inventory_long
			
 
				+        else:
			
 
				+            df_main = pd.concat(
			
 
				+                [df_main, df_inventory_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_main,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping["main"],
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				-    # regex replacements
			
 
				-    df_inventory_long["category"] = df_inventory_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    df_all_IF = find_and_replace_values(
			
 
				+        df=df_all_IF, replace_info=replace_info["main"], category_column=category_column
			
 
				     )
			
 
				 
			
 
				-    df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				+    ### convert to primap2 format ###
			
 
				+    data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				 
			
 
				-    df_inventory_long["data"] = df_inventory_long["data"].str.replace(",", ".")
			
 
				-    df_inventory_long["data"] = df_inventory_long["data"].str.replace("NE1", "NE")
			
 
				+    # ###
			
 
				+    # 2. Read energy sector tables
			
 
				+    # ###
			
 
				 
			
 
				-    # make sure all col headers are str
			
 
				-    df_inventory_long.columns = df_inventory_long.columns.map(str)
			
 
				-    df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
			
 
				+    df_energy = None
			
 
				+    for page in inv_conf["pages_to_read"]["energy"]:
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading table from page {page}.")
			
 
				 
			
 
				-    if df_main is None:
			
 
				-        df_main = df_inventory_long
			
 
				-    else:
			
 
				-        df_main = pd.concat(
			
 
				-            [df_main, df_inventory_long],
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+        )
			
 
				+
			
 
				+        print("Reading complete.")
			
 
				+
			
 
				+        df_energy_year = pd.concat(
			
 
				+            [tables_inventory_original[0].df[2:], tables_inventory_original[1].df[3:]],
			
 
				             axis=0,
			
 
				             join="outer",
			
 
				         ).reset_index(drop=True)
			
 
				 
			
 
				-print("Converting to interchange format.")
			
 
				-df_all_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_main,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping["main"],
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				+        # TODO This step should be done in pm2.pm2io.convert_long_dataframe_if()
			
 
				+        for row in delete_rows_by_category["energy"][page]:
			
 
				+            row_to_delete = df_energy_year.index[df_energy_year[0] == row][0]
			
 
				+            df_energy_year = df_energy_year.drop(index=row_to_delete)
			
 
				 
			
 
				-df_all_IF = find_and_replace_values(
			
 
				-    df=df_all_IF, replace_info=replace_info["main"], category_column=category_column
			
 
				-)
			
 
				+        # add header and unit
			
 
				+        df_header = pd.DataFrame([inv_conf["header_energy"], inv_conf["unit_energy"]])
			
 
				 
			
 
				-### convert to primap2 format ###
			
 
				-data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
			
 
				+        df_energy_year = pd.concat(
			
 
				+            [df_header, df_energy_year], axis=0, join="outer"
			
 
				+        ).reset_index(drop=True)
			
 
				 
			
 
				-# ###
			
 
				-# 2. Read energy sector tables
			
 
				-# ###
			
 
				+        df_energy_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_energy_year,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				+        )
			
 
				 
			
 
				-df_energy = None
			
 
				-for page in inv_conf["pages_to_read"]["energy"]:
			
 
				-    print("-" * 45)
			
 
				-    print(f"Reading table from page {page}.")
			
 
				+        print("Added unit information.")
			
 
				+        # set index
			
 
				+        df_energy_year = df_energy_year.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-    tables_inventory_original = camelot.read_pdf(
			
 
				-        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				-    )
			
 
				+        # convert to long format
			
 
				+        df_energy_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_energy_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-    print("Reading complete.")
			
 
				-
			
 
				-    df_energy_year = pd.concat(
			
 
				-        [tables_inventory_original[0].df[2:], tables_inventory_original[1].df[3:]],
			
 
				-        axis=0,
			
 
				-        join="outer",
			
 
				-    ).reset_index(drop=True)
			
 
				-
			
 
				-    # TODO This step should be done in pm2.pm2io.convert_long_dataframe_if()
			
 
				-    for row in delete_rows_by_category["energy"][page]:
			
 
				-        row_to_delete = df_energy_year.index[df_energy_year[0] == row][0]
			
 
				-        df_energy_year = df_energy_year.drop(index=row_to_delete)
			
 
				-
			
 
				-    # add header and unit
			
 
				-    df_header = pd.DataFrame([inv_conf["header_energy"], inv_conf["unit_energy"]])
			
 
				-
			
 
				-    df_energy_year = pd.concat(
			
 
				-        [df_header, df_energy_year], axis=0, join="outer"
			
 
				-    ).reset_index(drop=True)
			
 
				-
			
 
				-    df_energy_year = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_energy_year,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				-    )
			
 
				+        # extract from tuple
			
 
				+        df_energy_year_long["orig_cat_name"] = df_energy_year_long["orig_cat_name"].str[
			
 
				+            0
			
 
				+        ]
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				-    # set index
			
 
				-    df_energy_year = df_energy_year.set_index(inv_conf["index_cols"])
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        # make a copy of the categories row
			
 
				+        df_energy_year_long["category"] = df_energy_year_long["orig_cat_name"]
			
 
				 
			
 
				-    # convert to long format
			
 
				-    df_energy_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_energy_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				-    )
			
 
				+        # replace cat names by codes in col "category"
			
 
				+        # first the manual replacements
			
 
				+        df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+            "\n", ""
			
 
				+        )
			
 
				+        df_energy_year_long["category"] = df_energy_year_long["category"].replace(
			
 
				+            inv_conf["cat_codes_manual"]["energy"]
			
 
				+        )
			
 
				 
			
 
				-    # extract from tuple
			
 
				-    df_energy_year_long["orig_cat_name"] = df_energy_year_long["orig_cat_name"].str[0]
			
 
				+        df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+            ".", ""
			
 
				+        )
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    # make a copy of the categories row
			
 
				-    df_energy_year_long["category"] = df_energy_year_long["orig_cat_name"]
			
 
				+        # then the regex replacements
			
 
				+        df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    # replace cat names by codes in col "category"
			
 
				-    # first the manual replacements
			
 
				-    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				-        "\n", ""
			
 
				-    )
			
 
				-    df_energy_year_long["category"] = df_energy_year_long["category"].replace(
			
 
				-        inv_conf["cat_codes_manual"]["energy"]
			
 
				-    )
			
 
				+        df_energy_year_long = df_energy_year_long.reset_index(drop=True)
			
 
				 
			
 
				-    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				-        ".", ""
			
 
				-    )
			
 
				+        df_energy_year_long["data"] = df_energy_year_long["data"].str.replace(",", ".")
			
 
				+        df_energy_year_long["data"] = df_energy_year_long["data"].str.replace(
			
 
				+            "NE1", "NE"
			
 
				+        )
			
 
				 
			
 
				-    # then the regex replacements
			
 
				-    df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        # make sure all col headers are str
			
 
				+        df_energy_year_long.columns = df_energy_year_long.columns.map(str)
			
 
				+        df_energy_year_long = df_energy_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+        if df_energy is None:
			
 
				+            df_energy = df_energy_year_long
			
 
				+        else:
			
 
				+            df_energy = pd.concat(
			
 
				+                [df_energy, df_energy_year_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_energy_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_energy,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping["energy"],
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_energy_year_long = df_energy_year_long.reset_index(drop=True)
			
 
				+    ### convert to primap2 format ###
			
 
				+    data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)
			
 
				 
			
 
				-    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace(",", ".")
			
 
				-    df_energy_year_long["data"] = df_energy_year_long["data"].str.replace("NE1", "NE")
			
 
				+    # ###
			
 
				+    # 3. Read in afolu table
			
 
				+    # ###
			
 
				 
			
 
				-    # make sure all col headers are str
			
 
				-    df_energy_year_long.columns = df_energy_year_long.columns.map(str)
			
 
				-    df_energy_year_long = df_energy_year_long.drop(columns=["orig_cat_name"])
			
 
				+    df_afolu = None
			
 
				+    for page in inv_conf["pages_to_read"]["afolu"]:
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading table from page {page}.")
			
 
				 
			
 
				-    if df_energy is None:
			
 
				-        df_energy = df_energy_year_long
			
 
				-    else:
			
 
				-        df_energy = pd.concat(
			
 
				-            [df_energy, df_energy_year_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				+        tables_inventory_original = camelot.read_pdf(
			
 
				+            str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+        )
			
 
				+        print("Reading complete.")
			
 
				+
			
 
				+        if page == "127":
			
 
				+            # table on page 127 has one extra row at the top
			
 
				+            # and one extra category 3.A.1.j
			
 
				+            df_afolu_year = tables_inventory_original[0].df[3:]
			
 
				+            # 3.A.1.a.i to 3.A.1.j exist twice.
			
 
				+            # Rename duplicate categories in tables.
			
 
				+            for index, category_name in replace_categories["afolu"]["127"]:
			
 
				+                df_afolu_year.loc[index, 0] = category_name
			
 
				+        else:
			
 
				+            # cut first two lines
			
 
				+            df_afolu_year = tables_inventory_original[0].df[2:]
			
 
				+            # On pages 124-126 the wrong categories are slightly different
			
 
				+            for index, category_name in replace_categories["afolu"]["124-126"]:
			
 
				+                df_afolu_year.loc[index, 0] = category_name
			
 
				+
			
 
				+        # add header and unit
			
 
				+        df_header = pd.DataFrame([inv_conf["header_afolu"], inv_conf["unit_afolu"]])
			
 
				+
			
 
				+        df_afolu_year = pd.concat(
			
 
				+            [df_header, df_afolu_year], axis=0, join="outer"
			
 
				         ).reset_index(drop=True)
			
 
				 
			
 
				-print("Converting to interchange format.")
			
 
				-df_energy_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_energy,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping["energy"],
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				-### convert to primap2 format ###
			
 
				-data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)
			
 
				-
			
 
				-# ###
			
 
				-# 3. Read in afolu table
			
 
				-# ###
			
 
				+        df_afolu_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_afolu_year,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				+        )
			
 
				 
			
 
				-df_afolu = None
			
 
				-for page in inv_conf["pages_to_read"]["afolu"]:
			
 
				-    print("-" * 45)
			
 
				-    print(f"Reading table from page {page}.")
			
 
				+        print("Added unit information.")
			
 
				 
			
 
				-    tables_inventory_original = camelot.read_pdf(
			
 
				-        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				-    )
			
 
				-    print("Reading complete.")
			
 
				-
			
 
				-    if page == "127":
			
 
				-        # table on page 127 has one extra row at the top
			
 
				-        # and one extra category 3.A.1.j
			
 
				-        df_afolu_year = tables_inventory_original[0].df[3:]
			
 
				-        # 3.A.1.a.i to 3.A.1.j exist twice.
			
 
				-        # Rename duplicate categories in tables.
			
 
				-        for index, category_name in replace_categories["afolu"]["127"]:
			
 
				-            df_afolu_year.loc[index, 0] = category_name
			
 
				-    else:
			
 
				-        # cut first two lines
			
 
				-        df_afolu_year = tables_inventory_original[0].df[2:]
			
 
				-        # On pages 124-126 the wrong categories are slightly different
			
 
				-        for index, category_name in replace_categories["afolu"]["124-126"]:
			
 
				-            df_afolu_year.loc[index, 0] = category_name
			
 
				-
			
 
				-    # add header and unit
			
 
				-    df_header = pd.DataFrame([inv_conf["header_afolu"], inv_conf["unit_afolu"]])
			
 
				-
			
 
				-    df_afolu_year = pd.concat(
			
 
				-        [df_header, df_afolu_year], axis=0, join="outer"
			
 
				-    ).reset_index(drop=True)
			
 
				-
			
 
				-    df_afolu_year = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_afolu_year,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				-    )
			
 
				+        # set index
			
 
				+        df_afolu_year = df_afolu_year.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				-
			
 
				-    # set index
			
 
				-    df_afolu_year = df_afolu_year.set_index(inv_conf["index_cols"])
			
 
				+        # convert to long format
			
 
				+        df_afolu_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_afolu_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-    # convert to long format
			
 
				-    df_afolu_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_afolu_year, inv_conf["year"][page], inv_conf["header_long"]
			
 
				-    )
			
 
				+        df_afolu_year_long["orig_cat_name"] = df_afolu_year_long["orig_cat_name"].str[0]
			
 
				 
			
 
				-    df_afolu_year_long["orig_cat_name"] = df_afolu_year_long["orig_cat_name"].str[0]
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        # make a copy of the categories row
			
 
				+        df_afolu_year_long["category"] = df_afolu_year_long["orig_cat_name"]
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    # make a copy of the categories row
			
 
				-    df_afolu_year_long["category"] = df_afolu_year_long["orig_cat_name"]
			
 
				+        # regex replacements
			
 
				+        df_afolu_year_long["category"] = df_afolu_year_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    # regex replacements
			
 
				-    df_afolu_year_long["category"] = df_afolu_year_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        df_afolu_year_long = df_afolu_year_long.reset_index(drop=True)
			
 
				+
			
 
				+        df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace(",", ".")
			
 
				+        df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+        # make sure all col headers are str
			
 
				+        df_afolu_year_long.columns = df_afolu_year_long.columns.map(str)
			
 
				+        df_afolu_year_long = df_afolu_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+        if df_afolu is None:
			
 
				+            df_afolu = df_afolu_year_long
			
 
				+        else:
			
 
				+            df_afolu = pd.concat(
			
 
				+                [df_afolu, df_afolu_year_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_afolu_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_afolu,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping["afolu"],
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_afolu_year_long = df_afolu_year_long.reset_index(drop=True)
			
 
				-
			
 
				-    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace(",", ".")
			
 
				-    df_afolu_year_long["data"] = df_afolu_year_long["data"].str.replace("NE1", "NE")
			
 
				-
			
 
				-    # make sure all col headers are str
			
 
				-    df_afolu_year_long.columns = df_afolu_year_long.columns.map(str)
			
 
				-    df_afolu_year_long = df_afolu_year_long.drop(columns=["orig_cat_name"])
			
 
				-
			
 
				-    if df_afolu is None:
			
 
				-        df_afolu = df_afolu_year_long
			
 
				-    else:
			
 
				-        df_afolu = pd.concat(
			
 
				-            [df_afolu, df_afolu_year_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				-
			
 
				-print("Converting to interchange format.")
			
 
				-df_afolu_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_afolu,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping["afolu"],
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				-### convert to primap2 format ###
			
 
				-data_pm2_afolu = pm2.pm2io.from_interchange_format(df_afolu_IF)
			
 
				+    ### convert to primap2 format ###
			
 
				+    data_pm2_afolu = pm2.pm2io.from_interchange_format(df_afolu_IF)
			
 
				 
			
 
				-# ###
			
 
				-# 4. Read in Waste tables - pages 128, 130
			
 
				-# ###
			
 
				+    # ###
			
 
				+    # 4. Read in Waste tables - pages 128, 130
			
 
				+    # ###
			
 
				 
			
 
				-# There are three tables for three years on page 128
			
 
				-# and another table for the last year on page 130
			
 
				-
			
 
				-# read the first three tables
			
 
				-page = inv_conf["pages_to_read"]["waste"][0]
			
 
				-tables_inventory_original_128 = camelot.read_pdf(
			
 
				-    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				-)
			
 
				-
			
 
				-# read last table
			
 
				-page = inv_conf["pages_to_read"]["waste"][1]
			
 
				-tables_inventory_original_130 = camelot.read_pdf(
			
 
				-    str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				-)
			
 
				+    # There are three tables for three years on page 128
			
 
				+    # and another table for the last year on page 130
			
 
				 
			
 
				-# combine in a dict
			
 
				-df_waste_years = {
			
 
				-    "1990": tables_inventory_original_128[0].df,
			
 
				-    "2000": tables_inventory_original_128[1].df,
			
 
				-    "2010": tables_inventory_original_128[2].df,
			
 
				-    "2019": tables_inventory_original_130[0].df,
			
 
				-}
			
 
				-
			
 
				-df_waste = None
			
 
				-for year in df_waste_years.keys():
			
 
				-    print("-" * 45)
			
 
				-    print(f"Processing table for {year}.")
			
 
				-
			
 
				-    df_waste_year = df_waste_years[year][2:]
			
 
				-
			
 
				-    # add header and unit
			
 
				-    df_header = pd.DataFrame([inv_conf["header_waste"], inv_conf["unit_waste"]])
			
 
				-
			
 
				-    df_waste_year = pd.concat(
			
 
				-        [df_header, df_waste_year], axis=0, join="outer"
			
 
				-    ).reset_index(drop=True)
			
 
				-
			
 
				-    df_waste_year = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_waste_year,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				+    # read the first three tables
			
 
				+    page = inv_conf["pages_to_read"]["waste"][0]
			
 
				+    tables_inventory_original_128 = camelot.read_pdf(
			
 
				+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				     )
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				-
			
 
				-    # set index
			
 
				-    df_waste_year = df_waste_year.set_index(inv_conf["index_cols"])
			
 
				-
			
 
				-    # convert to long format
			
 
				-    df_waste_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_waste_year, year, inv_conf["header_long"]
			
 
				+    # read last table
			
 
				+    page = inv_conf["pages_to_read"]["waste"][1]
			
 
				+    tables_inventory_original_130 = camelot.read_pdf(
			
 
				+        str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				     )
			
 
				 
			
 
				-    df_waste_year_long["orig_cat_name"] = df_waste_year_long["orig_cat_name"].str[0]
			
 
				+    # combine in a dict
			
 
				+    df_waste_years = {
			
 
				+        "1990": tables_inventory_original_128[0].df,
			
 
				+        "2000": tables_inventory_original_128[1].df,
			
 
				+        "2010": tables_inventory_original_128[2].df,
			
 
				+        "2019": tables_inventory_original_130[0].df,
			
 
				+    }
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    # make a copy of the categories row
			
 
				-    df_waste_year_long["category"] = df_waste_year_long["orig_cat_name"]
			
 
				+    df_waste = None
			
 
				+    for year in df_waste_years.keys():
			
 
				+        print("-" * 45)
			
 
				+        print(f"Processing table for {year}.")
			
 
				 
			
 
				-    # regex replacements
			
 
				-    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				-    )
			
 
				+        df_waste_year = df_waste_years[year][2:]
			
 
				 
			
 
				-    df_waste_year_long = df_waste_year_long.reset_index(drop=True)
			
 
				+        # add header and unit
			
 
				+        df_header = pd.DataFrame([inv_conf["header_waste"], inv_conf["unit_waste"]])
			
 
				 
			
 
				-    df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(".", "")
			
 
				-    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace(",", ".")
			
 
				-    df_waste_year_long["data"] = df_waste_year_long["data"].str.replace("NE1", "NE")
			
 
				+        df_waste_year = pd.concat(
			
 
				+            [df_header, df_waste_year], axis=0, join="outer"
			
 
				+        ).reset_index(drop=True)
			
 
				 
			
 
				-    # make sure all col headers are str
			
 
				-    df_waste_year_long.columns = df_waste_year_long.columns.map(str)
			
 
				-    df_waste_year_long = df_waste_year_long.drop(columns=["orig_cat_name"])
			
 
				+        df_waste_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_waste_year,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				+        )
			
 
				 
			
 
				-    if df_waste is None:
			
 
				-        df_waste = df_waste_year_long
			
 
				-    else:
			
 
				-        df_waste = pd.concat(
			
 
				-            [df_waste, df_waste_year_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				+        print("Added unit information.")
			
 
				 
			
 
				-print("Converting to interchange format.")
			
 
				-df_waste_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_waste,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping["waste"],
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				+        # set index
			
 
				+        df_waste_year = df_waste_year.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-### convert to primap2 format ###
			
 
				-data_pm2_waste = pm2.pm2io.from_interchange_format(df_waste_IF)
			
 
				+        # convert to long format
			
 
				+        df_waste_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_waste_year, year, inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-# ###
			
 
				-# 5. Read in trend tables - pages 131 - 137
			
 
				-# ###
			
 
				+        df_waste_year_long["orig_cat_name"] = df_waste_year_long["orig_cat_name"].str[0]
			
 
				 
			
 
				-df_trend = None
			
 
				-pages = inv_conf["pages_to_read"]["trend"]
			
 
				-entities = inv_conf["entity_for_page"]["trend"]
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        # make a copy of the categories row
			
 
				+        df_waste_year_long["category"] = df_waste_year_long["orig_cat_name"]
			
 
				 
			
 
				-# for this set of tables every page is a different entity
			
 
				-for page, entity in zip(pages, entities):
			
 
				-    print("-" * 45)
			
 
				-    print(f"Reading table for page {page} and entity {entity}.")
			
 
				+        # regex replacements
			
 
				+        df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    # First table must be read in with flavor="stream", as
			
 
				-    # flavor="lattice" raises an error. Maybe camelot issue
			
 
				-    # see https://github.com/atlanhq/camelot/issues/306,
			
 
				-    # or because characters in first row almost touch
			
 
				-    # the table grid.
			
 
				-    if page == "131":
			
 
				-        tables_inventory_original = camelot.read_pdf(
			
 
				-            str(input_folder / pdf_file),
			
 
				-            pages=page,
			
 
				-            table_areas=page_def_templates[page]["area"],
			
 
				-            columns=page_def_templates[page]["cols"],
			
 
				-            flavor="stream",
			
 
				-            split_text=True,
			
 
				+        df_waste_year_long = df_waste_year_long.reset_index(drop=True)
			
 
				+
			
 
				+        df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
			
 
				+            ".", ""
			
 
				         )
			
 
				+        df_waste_year_long["data"] = df_waste_year_long["data"].str.replace(",", ".")
			
 
				+        df_waste_year_long["data"] = df_waste_year_long["data"].str.replace("NE1", "NE")
			
 
				+
			
 
				+        # make sure all col headers are str
			
 
				+        df_waste_year_long.columns = df_waste_year_long.columns.map(str)
			
 
				+        df_waste_year_long = df_waste_year_long.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+        if df_waste is None:
			
 
				+            df_waste = df_waste_year_long
			
 
				+        else:
			
 
				+            df_waste = pd.concat(
			
 
				+                [df_waste, df_waste_year_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_waste_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_waste,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping["waste"],
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				+    )
			
 
				 
			
 
				-        df_trend_entity = tables_inventory_original[0].df[1:]
			
 
				+    ### convert to primap2 format ###
			
 
				+    data_pm2_waste = pm2.pm2io.from_interchange_format(df_waste_IF)
			
 
				+
			
 
				+    # ###
			
 
				+    # 5. Read in trend tables - pages 131 - 137
			
 
				+    # ###
			
 
				+
			
 
				+    df_trend = None
			
 
				+    pages = inv_conf["pages_to_read"]["trend"]
			
 
				+    entities = inv_conf["entity_for_page"]["trend"]
			
 
				+
			
 
				+    # for this set of tables every page is a different entity
			
 
				+    for page, entity in zip(pages, entities):
			
 
				+        print("-" * 45)
			
 
				+        print(f"Reading table for page {page} and entity {entity}.")
			
 
				+
			
 
				+        # First table must be read in with flavor="stream", as
			
 
				+        # flavor="lattice" raises an error. Maybe camelot issue
			
 
				+        # see https://github.com/atlanhq/camelot/issues/306,
			
 
				+        # or because characters in first row almost touch
			
 
				+        # the table grid.
			
 
				+        if page == "131":
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                table_areas=page_def_templates[page]["area"],
			
 
				+                columns=page_def_templates[page]["cols"],
			
 
				+                flavor="stream",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+
			
 
				+            df_trend_entity = tables_inventory_original[0].df[1:]
			
 
				+
			
 
				+        else:
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                flavor="lattice",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+            df_trend_entity = tables_inventory_original[0].df[3:]
			
 
				+
			
 
				+        print("Reading complete.")
			
 
				+
			
 
				+        if page in delete_rows_by_category["trend"].keys():
			
 
				+            for category in delete_rows_by_category["trend"][page]:
			
 
				+                row_to_delete = df_trend_entity.index[df_trend_entity[0] == category][0]
			
 
				+                df_trend_entity = df_trend_entity.drop(index=row_to_delete)
			
 
				+
			
 
				+        df_trend_entity.columns = inv_conf["header_trend"]
			
 
				+
			
 
				+        df_trend_entity = df_trend_entity.copy()
			
 
				+
			
 
				+        # unit is always Gg
			
 
				+        df_trend_entity.loc[:, "unit"] = "Gg"
			
 
				+
			
 
				+        # only one entity per table
			
 
				+        df_trend_entity.loc[:, "entity"] = entity
			
 
				+
			
 
				+        df_trend_entity.loc[:, "category"] = df_trend_entity["orig_cat_name"]
			
 
				+
			
 
				+        df_trend_entity["category"] = df_trend_entity["category"].replace(
			
 
				+            inv_conf["cat_codes_manual"]["trend"]
			
 
				+        )
			
 
				 
			
 
				-    else:
			
 
				-        tables_inventory_original = camelot.read_pdf(
			
 
				-            str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
			
 
				+        df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+            ".", ""
			
 
				+        )
			
 
				+        df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+            "\n", ""
			
 
				         )
			
 
				-        df_trend_entity = tables_inventory_original[0].df[3:]
			
 
				 
			
 
				-    print("Reading complete.")
			
 
				+        df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    if page in delete_rows_by_category["trend"].keys():
			
 
				-        for category in delete_rows_by_category["trend"][page]:
			
 
				-            row_to_delete = df_trend_entity.index[df_trend_entity[0] == category][0]
			
 
				-            df_trend_entity = df_trend_entity.drop(index=row_to_delete)
			
 
				+        df_trend_entity = df_trend_entity.reset_index(drop=True)
			
 
				 
			
 
				-    df_trend_entity.columns = inv_conf["header_trend"]
			
 
				+        print("Created category codes.")
			
 
				 
			
 
				-    df_trend_entity = df_trend_entity.copy()
			
 
				+        for year in inv_conf["header_trend"][1:]:
			
 
				+            df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(",", ".")
			
 
				+            df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(
			
 
				+                "NE1", "NE"
			
 
				+            )
			
 
				 
			
 
				-    # unit is always Gg
			
 
				-    df_trend_entity.loc[:, "unit"] = "Gg"
			
 
				+        # make sure all col headers are str
			
 
				+        df_trend_entity.columns = df_trend_entity.columns.map(str)
			
 
				 
			
 
				-    # only one entity per table
			
 
				-    df_trend_entity.loc[:, "entity"] = entity
			
 
				+        df_trend_entity = df_trend_entity.drop(columns=["orig_cat_name"])
			
 
				 
			
 
				-    df_trend_entity.loc[:, "category"] = df_trend_entity["orig_cat_name"]
			
 
				+        # TODO better to use pm2.pm2io.convert_wide_dataframe_if
			
 
				+        df_trend_entity_long = pd.wide_to_long(
			
 
				+            df_trend_entity, stubnames="data", i="category", j="time"
			
 
				+        )
			
 
				 
			
 
				-    df_trend_entity["category"] = df_trend_entity["category"].replace(
			
 
				-        inv_conf["cat_codes_manual"]["trend"]
			
 
				+        print("Converted to long format.")
			
 
				+
			
 
				+        df_trend_entity_long = df_trend_entity_long.reset_index()
			
 
				+
			
 
				+        if df_trend is None:
			
 
				+            df_trend = df_trend_entity_long
			
 
				+        else:
			
 
				+            df_trend = pd.concat(
			
 
				+                [df_trend, df_trend_entity_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				+
			
 
				+    print("Converting to interchange format.")
			
 
				+
			
 
				+    df_trend_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_trend,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping["trend"],
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				-        ".", ""
			
 
				-    )
			
 
				-    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				-        "\n", ""
			
 
				+    df_trend_IF = find_and_replace_values(
			
 
				+        df=df_trend_IF,
			
 
				+        replace_info=replace_info["trend"],
			
 
				+        category_column=category_column,
			
 
				     )
			
 
				 
			
 
				-    df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    ### convert to primap2 format ###
			
 
				+    data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				+
			
 
				+    # ###
			
 
				+    # Combine tables
			
 
				+    # ###
			
 
				+
			
 
				+    # merge main and energy
			
 
				+    # There are discrepancies larger than 0.86 for area category 1.A.2, entity NMVOC,
			
 
				+    # years 1990, 2000, 2010, 2019
			
 
				+    # It is assumed the main table has the correct values.
			
 
				+    print("Merging main and energy table.")
			
 
				+    data_pm2 = data_pm2_main.pr.merge(data_pm2_energy, tolerance=1)
			
 
				+
			
 
				+    # merge afolu
			
 
				+    print("Merging afolu table.")
			
 
				+    data_pm2 = data_pm2.pr.merge(data_pm2_afolu, tolerance=0.11)
			
 
				+
			
 
				+    # merge waste
			
 
				+    # increasing tolerance to merge values for 4.C, 1990, N2O - 0.003 in sector table,
			
 
				+    # 0.0034 in main table
			
 
				+    print("Merging waste table.")
			
 
				+    data_pm2 = data_pm2.pr.merge(data_pm2_waste, tolerance=0.15)
			
 
				+
			
 
				+    # merge trend
			
 
				+    print("Merging trend table.")
			
 
				+    data_pm2 = data_pm2.pr.merge(data_pm2_trend, tolerance=0.11)
			
 
				+
			
 
				+    # convert back to IF to have units in the fixed format
			
 
				+    # ( per year / per a / per annum)
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+    # ###
			
 
				+    # Save raw data to IF and native format.
			
 
				+    # ###
			
 
				+
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				     )
			
 
				 
			
 
				-    df_trend_entity = df_trend_entity.reset_index(drop=True)
			
 
				-
			
 
				-    print("Created category codes.")
			
 
				-
			
 
				-    for year in inv_conf["header_trend"][1:]:
			
 
				-        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(",", ".")
			
 
				-        df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace("NE1", "NE")
			
 
				-
			
 
				-    # make sure all col headers are str
			
 
				-    df_trend_entity.columns = df_trend_entity.columns.map(str)
			
 
				-
			
 
				-    df_trend_entity = df_trend_entity.drop(columns=["orig_cat_name"])
			
 
				-
			
 
				-    # TODO better to use pm2.pm2io.convert_wide_dataframe_if
			
 
				-    df_trend_entity_long = pd.wide_to_long(
			
 
				-        df_trend_entity, stubnames="data", i="category", j="time"
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				     )
			
 
				 
			
 
				-    print("Converted to long format.")
			
 
				-
			
 
				-    df_trend_entity_long = df_trend_entity_long.reset_index()
			
 
				-
			
 
				-    if df_trend is None:
			
 
				-        df_trend = df_trend_entity_long
			
 
				-    else:
			
 
				-        df_trend = pd.concat(
			
 
				-            [df_trend, df_trend_entity_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				-
			
 
				-print("Converting to interchange format.")
			
 
				-
			
 
				-df_trend_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_trend,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping["trend"],
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				-df_trend_IF = find_and_replace_values(
			
 
				-    df=df_trend_IF, replace_info=replace_info["trend"], category_column=category_column
			
 
				-)
			
 
				-
			
 
				-### convert to primap2 format ###
			
 
				-data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				-
			
 
				-# ###
			
 
				-# Combine tables
			
 
				-# ###
			
 
				-
			
 
				-# merge main and energy
			
 
				-# There are discrepancies larger than 0.86 for area category 1.A.2, entity NMVOC,
			
 
				-# years 1990, 2000, 2010, 2019
			
 
				-# It is assumed the main table has the correct values.
			
 
				-print("Merging main and energy table.")
			
 
				-data_pm2 = data_pm2_main.pr.merge(data_pm2_energy, tolerance=1)
			
 
				-
			
 
				-# merge afolu
			
 
				-print("Merging afolu table.")
			
 
				-data_pm2 = data_pm2.pr.merge(data_pm2_afolu, tolerance=0.11)
			
 
				-
			
 
				-# merge waste
			
 
				-# increasing tolerance to merge values for 4.C, 1990, N2O - 0.003 in sector table, 0.0034 in main table
			
 
				-print("Merging waste table.")
			
 
				-data_pm2 = data_pm2.pr.merge(data_pm2_waste, tolerance=0.15)
			
 
				-
			
 
				-# merge trend
			
 
				-print("Merging trend table.")
			
 
				-data_pm2 = data_pm2.pr.merge(data_pm2_trend, tolerance=0.11)
			
 
				-
			
 
				-# convert back to IF to have units in the fixed format ( per year / per a / per annum)
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-# ###
			
 
				-# Save raw data to IF and native format.
			
 
				-# ###
			
 
				-
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-    data_if,
			
 
				-)
			
 
				-
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				-
			
 
				-# ###
			
 
				-# Processing
			
 
				-# ###
			
 
				-
			
 
				-data_proc_pm2 = process_data_for_country(
			
 
				-    data_country=data_pm2,
			
 
				-    entities_to_ignore=[],
			
 
				-    gas_baskets=gas_baskets,
			
 
				-    filter_dims=None,  # leaving this explicit for now
			
 
				-    cat_terminology_out=None,
			
 
				-    category_conversion=None,
			
 
				-    sectors_out=None,
			
 
				-    processing_info_country=country_processing_step1,
			
 
				-)
			
 
				+    # ###
			
 
				+    # Processing
			
 
				+    # ###
			
 
				+
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_country=data_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        filter_dims=None,  # leaving this explicit for now
			
 
				+        cat_terminology_out=None,
			
 
				+        category_conversion=None,
			
 
				+        sectors_out=None,
			
 
				+        processing_info_country=country_processing_step1,
			
 
				+    )
			
 
				 
			
 
				-# ###
			
 
				-# save processed data to IF and native format
			
 
				-# ###
			
 
				+    # ###
			
 
				+    # save processed data to IF and native format
			
 
				+    # ###
			
 
				 
			
 
				-terminology_proc = coords_terminologies["category"]
			
 
				+    terminology_proc = coords_terminologies["category"]
			
 
				 
			
 
				-data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				-)
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				-encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				-data_proc_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				-)
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
 
				 
			
 
				-print("Saved processed data.")
			
 
				+    print("Saved processed data.")
			
--- a/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Israel/read_ISR_BUR2_from_pdf.py
@@ -16,8 +16,14 @@ import camelot
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				 
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				 # configuration import
			
 
				-from config_isr_bur2 import (
			
 
				+from .config_isr_bur2 import (
			
 
				     basket_copy,
			
 
				     cat_conversion,
			
 
				     cats_to_agg,
			
@@ -37,12 +43,6 @@ from config_isr_bur2 import (
 
				     trend_table_def,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     ### genral configuration
			
 
				     input_folder = downloaded_data_path / "UNFCCC" / "Israel" / "BUR2"
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR3_from_pdf.py
@@ -9,7 +9,16 @@ Data are read from pdf using camelot
 
				 
			
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				-from config_mys_bur3 import (
			
 
				+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_mys_bur3 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_names_fix,
			
@@ -27,14 +36,6 @@ from config_mys_bur3 import (
 
				     terminology_proc,
			
 
				     values_replacement,
			
 
				 )
			
 
				-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				-
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    fix_rows,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
--- a/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Malaysia/read_MYS_BUR4_from_pdf.py
@@ -10,7 +10,16 @@ Code ist mostly identical to BUR3
 
				 
			
 
				 import camelot
			
 
				 import primap2 as pm2
			
 
				-from config_mys_bur4 import (
			
 
				+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_mys_bur4 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_names_fix,
			
@@ -28,14 +37,6 @@ from config_mys_bur4 import (
 
				     terminology_proc,
			
 
				     values_replacement,
			
 
				 )
			
 
				-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				-
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    fix_rows,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mexico/read_MEX_BUR3_from_pdf.py
@@ -9,10 +9,11 @@ Data are read from pdf using camelot
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_mex_bur3 import fix_rows, page_defs
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				+from .config_mex_bur3 import fix_rows, page_defs
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
@@ -49,7 +50,7 @@ if __name__ == "__main__":
 
				     # manual category codes
			
 
				     cat_codes_manual = {
			
 
				         "Todas las emisiones y las absorciones nacionales": "0",
			
 
				-        "Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada": "M0EL",
			
 
				+        "Todas las emisiones (sin [3B] Tierra ni [3D1] Productos de madera recolectada": "M0EL",  # noqa: E501
			
 
				         "2F6 Otras aplicaciones": "2F6",
			
 
				     }
			
 
				 
			
@@ -120,7 +121,8 @@ if __name__ == "__main__":
 
				 
			
 
				         # fix rows
			
 
				         for n_rows in page_def["rows_to_fix"].keys():
			
 
				-            # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				+            # replace line breaks, long hyphens, double, and triple spaces in category
			
 
				+            # names
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
--- a/src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Mongolia/read_MNG_BUR2_from_pdf.py
@@ -4,7 +4,15 @@ Read Mongolia's BUR2 from pdf
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_mng_bur2 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_mng_bur2 import (
			
 
				     coords_cols,
			
 
				     coords_defaults,
			
 
				     coords_terminologies,
			
@@ -18,320 +26,315 @@ from config_mng_bur2 import (
 
				     meta_data,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    fix_rows,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+
			
 
				+    input_folder = downloaded_data_path / "UNFCCC" / "Mongolia" / "BUR2"
			
 
				+    output_folder = extracted_data_path / "UNFCCC" / "Mongolia"
			
 
				+
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    pdf_file = "20231112_NIR_MGL.pdf"
			
 
				+    output_filename = "MNG_BUR2_2023_"
			
 
				+    category_column = f"category ({coords_terminologies['category']})"
			
 
				+    compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("code")
			
 
				+
			
 
				+    # ###
			
 
				+    # 1. Read in main tables
			
 
				+    # ###
			
 
				+
			
 
				+    df_main = None
			
 
				+    for year in inv_conf_per_year.keys():
			
 
				+        print("-" * 60)
			
 
				+        print(f"Reading year {year}.")
			
 
				+        print("-" * 60)
			
 
				+        df_year = None
			
 
				+        for page in inv_conf_per_year[year]["page_defs"].keys():
			
 
				+            print(f"Reading table from page {page}.")
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                table_areas=inv_conf_per_year[year]["page_defs"][page]["area"],
			
 
				+                columns=inv_conf_per_year[year]["page_defs"][page]["cols"],
			
 
				+                flavor="stream",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+            print("Reading complete.")
			
 
				+
			
 
				+            df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+            if df_year is None:
			
 
				+                df_year = df_page
			
 
				+            else:
			
 
				+                df_year = pd.concat(
			
 
				+                    [df_year, df_page],
			
 
				+                    axis=0,
			
 
				+                    join="outer",
			
 
				+                ).reset_index(drop=True)
			
 
				+
			
 
				+        print(f"Concatenating all tables for {year}.")
			
 
				+
			
 
				+        # fix content that spreads across multiple rows
			
 
				+        if "rows_to_fix" in inv_conf_per_year[year]:
			
 
				+            for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
			
 
				+                print(f"Merge content for {n_rows=}")
			
 
				+                df_year = fix_rows(
			
 
				+                    df_year,
			
 
				+                    rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
			
 
				+                    col_to_use=0,
			
 
				+                    n_rows=n_rows,
			
 
				+                )
			
 
				+
			
 
				+        df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+
			
 
				+        skip_rows = 11
			
 
				+        df_year = pd.concat(
			
 
				+            [df_header, df_year[skip_rows:]], axis=0, join="outer"
			
 
				+        ).reset_index(drop=True)
			
 
				 
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				-
			
 
				-input_folder = downloaded_data_path / "UNFCCC" / "Mongolia" / "BUR2"
			
 
				-output_folder = extracted_data_path / "UNFCCC" / "Mongolia"
			
 
				-
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				-pdf_file = "20231112_NIR_MGL.pdf"
			
 
				-output_filename = "MNG_BUR2_2023_"
			
 
				-category_column = f"category ({coords_terminologies['category']})"
			
 
				-compression = dict(zlib=True, complevel=9)
			
 
				-
			
 
				-
			
 
				-def repl(m):  # noqa: D103
			
 
				-    return m.group("code")
			
 
				-
			
 
				-
			
 
				-# ###
			
 
				-# 1. Read in main tables
			
 
				-# ###
			
 
				-
			
 
				-df_main = None
			
 
				-for year in inv_conf_per_year.keys():
			
 
				-    print("-" * 60)
			
 
				-    print(f"Reading year {year}.")
			
 
				-    print("-" * 60)
			
 
				-    df_year = None
			
 
				-    for page in inv_conf_per_year[year]["page_defs"].keys():
			
 
				-        print(f"Reading table from page {page}.")
			
 
				-        tables_inventory_original = camelot.read_pdf(
			
 
				-            str(input_folder / pdf_file),
			
 
				-            pages=page,
			
 
				-            table_areas=inv_conf_per_year[year]["page_defs"][page]["area"],
			
 
				-            columns=inv_conf_per_year[year]["page_defs"][page]["cols"],
			
 
				-            flavor="stream",
			
 
				-            split_text=True,
			
 
				+        df_year = pm2.pm2io.nir_add_unit_information(
			
 
				+            df_year,
			
 
				+            unit_row=inv_conf["unit_row"],
			
 
				+            entity_row=inv_conf["entity_row"],
			
 
				+            regexp_entity=".*",
			
 
				+            regexp_unit=".*",
			
 
				+            default_unit="Gg",
			
 
				         )
			
 
				-        print("Reading complete.")
			
 
				 
			
 
				-        df_page = tables_inventory_original[0].df
			
 
				+        print("Added unit information.")
			
 
				 
			
 
				-        if df_year is None:
			
 
				-            df_year = df_page
			
 
				-        else:
			
 
				-            df_year = pd.concat(
			
 
				-                [df_year, df_page],
			
 
				-                axis=0,
			
 
				-                join="outer",
			
 
				-            ).reset_index(drop=True)
			
 
				+        # set index
			
 
				+        df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				 
			
 
				-    print(f"Concatenating all tables for {year}.")
			
 
				-
			
 
				-    # fix content that spreads across multiple rows
			
 
				-    if "rows_to_fix" in inv_conf_per_year[year]:
			
 
				-        for n_rows in inv_conf_per_year[year]["rows_to_fix"].keys():
			
 
				-            print(f"Merge content for {n_rows=}")
			
 
				-            df_year = fix_rows(
			
 
				-                df_year,
			
 
				-                rows_to_fix=inv_conf_per_year[year]["rows_to_fix"][n_rows],
			
 
				-                col_to_use=0,
			
 
				-                n_rows=n_rows,
			
 
				-            )
			
 
				+        # convert to long format
			
 
				+        df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				+            df_year, year, inv_conf["header_long"]
			
 
				+        )
			
 
				 
			
 
				-    df_header = pd.DataFrame([inv_conf["header"], inv_conf["unit"]])
			
 
				+        # extract from tuple
			
 
				+        df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				 
			
 
				-    skip_rows = 11
			
 
				-    df_year = pd.concat(
			
 
				-        [df_header, df_year[skip_rows:]], axis=0, join="outer"
			
 
				-    ).reset_index(drop=True)
			
 
				+        # prep for conversion to PM2 IF and native format
			
 
				+        # make a copy of the categories row
			
 
				+        df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				 
			
 
				-    df_year = pm2.pm2io.nir_add_unit_information(
			
 
				-        df_year,
			
 
				-        unit_row=inv_conf["unit_row"],
			
 
				-        entity_row=inv_conf["entity_row"],
			
 
				-        regexp_entity=".*",
			
 
				-        regexp_unit=".*",
			
 
				-        default_unit="Gg",
			
 
				-    )
			
 
				+        # replace cat names by codes in col "category"
			
 
				+        # first the manual replacements
			
 
				 
			
 
				-    print("Added unit information.")
			
 
				+        df_year_long["category"] = df_year_long["category"].replace(
			
 
				+            inv_conf["cat_codes_manual"]
			
 
				+        )
			
 
				 
			
 
				-    # set index
			
 
				-    df_year = df_year.set_index(inv_conf["index_cols"])
			
 
				+        df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				 
			
 
				-    # convert to long format
			
 
				-    df_year_long = pm2.pm2io.nir_convert_df_to_long(
			
 
				-        df_year, year, inv_conf["header_long"]
			
 
				-    )
			
 
				+        # then the regex replacements
			
 
				+        df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-    # extract from tuple
			
 
				-    df_year_long["orig_cat_name"] = df_year_long["orig_cat_name"].str[0]
			
 
				+        df_year_long = df_year_long.reset_index(drop=True)
			
 
				 
			
 
				-    # prep for conversion to PM2 IF and native format
			
 
				-    # make a copy of the categories row
			
 
				-    df_year_long["category"] = df_year_long["orig_cat_name"]
			
 
				+        df_year_long["data"] = df_year_long["data"].str.replace(",", "")
			
 
				 
			
 
				-    # replace cat names by codes in col "category"
			
 
				-    # first the manual replacements
			
 
				+        # make sure all col headers are str
			
 
				+        df_year_long.columns = df_year_long.columns.map(str)
			
 
				 
			
 
				-    df_year_long["category"] = df_year_long["category"].replace(
			
 
				-        inv_conf["cat_codes_manual"]
			
 
				-    )
			
 
				+        df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				 
			
 
				-    df_year_long["category"] = df_year_long["category"].str.replace(".", "")
			
 
				+        if df_main is None:
			
 
				+            df_main = df_year_long
			
 
				+        else:
			
 
				+            df_main = pd.concat(
			
 
				+                [df_main, df_year_long],
			
 
				+                axis=0,
			
 
				+                join="outer",
			
 
				+            ).reset_index(drop=True)
			
 
				 
			
 
				-    # then the regex replacements
			
 
				-    df_year_long["category"] = df_year_long["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    ### convert to interchange format ###
			
 
				+    print("Converting to interchange format.")
			
 
				+    df_main_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				+        df_main,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_year_long = df_year_long.reset_index(drop=True)
			
 
				+    ### convert to primap2 format ###
			
 
				+    print("Converting to primap2 format.")
			
 
				+    data_main_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
			
 
				 
			
 
				-    df_year_long["data"] = df_year_long["data"].str.replace(",", "")
			
 
				+    # ###
			
 
				+    # 2. Read in trend tables
			
 
				+    # ###
			
 
				 
			
 
				-    # make sure all col headers are str
			
 
				-    df_year_long.columns = df_year_long.columns.map(str)
			
 
				+    df_trend = None
			
 
				+    for entity in inv_conf_per_entity.keys():
			
 
				+        print("-" * 60)
			
 
				+        print(f"Reading entity {entity}.")
			
 
				 
			
 
				-    df_year_long = df_year_long.drop(columns=["orig_cat_name"])
			
 
				+        df_entity = None
			
 
				 
			
 
				-    if df_main is None:
			
 
				-        df_main = df_year_long
			
 
				-    else:
			
 
				-        df_main = pd.concat(
			
 
				-            [df_main, df_year_long],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				+        for page in inv_conf_per_entity[entity]["page_defs"].keys():
			
 
				+            print(f"Reading page {page}.")
			
 
				 
			
 
				-### convert to interchange format ###
			
 
				-print("Converting to interchange format.")
			
 
				-df_main_IF = pm2.pm2io.convert_long_dataframe_if(
			
 
				-    df_main,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				-### convert to primap2 format ###
			
 
				-print("Converting to primap2 format.")
			
 
				-data_main_pm2 = pm2.pm2io.from_interchange_format(df_main_IF)
			
 
				-
			
 
				-# ###
			
 
				-# 2. Read in trend tables
			
 
				-# ###
			
 
				-
			
 
				-df_trend = None
			
 
				-for entity in inv_conf_per_entity.keys():
			
 
				-    print("-" * 60)
			
 
				-    print(f"Reading entity {entity}.")
			
 
				-
			
 
				-    df_entity = None
			
 
				+            tables_inventory_original = camelot.read_pdf(
			
 
				+                str(input_folder / pdf_file),
			
 
				+                pages=page,
			
 
				+                table_areas=inv_conf_per_entity[entity]["page_defs"][page]["area"],
			
 
				+                columns=inv_conf_per_entity[entity]["page_defs"][page]["cols"],
			
 
				+                flavor="stream",
			
 
				+                split_text=True,
			
 
				+            )
			
 
				+            df_page = tables_inventory_original[0].df
			
 
				+
			
 
				+            if df_entity is None:
			
 
				+                df_entity = df_page
			
 
				+            else:
			
 
				+                df_entity = pd.concat(
			
 
				+                    [df_entity, df_page],
			
 
				+                    axis=0,
			
 
				+                    join="outer",
			
 
				+                ).reset_index(drop=True)
			
 
				+            print(f"adding table from page {page}.")
			
 
				+
			
 
				+        if "rows_to_fix" in inv_conf_per_entity[entity]:
			
 
				+            for n_rows in inv_conf_per_entity[entity]["rows_to_fix"].keys():
			
 
				+                print(f"Merge content for {n_rows=}")
			
 
				+                df_entity = fix_rows(
			
 
				+                    df_entity,
			
 
				+                    rows_to_fix=inv_conf_per_entity[entity]["rows_to_fix"][n_rows],
			
 
				+                    col_to_use=0,
			
 
				+                    n_rows=n_rows,
			
 
				+                )
			
 
				+
			
 
				+        df_entity.columns = df_entity.iloc[0, :]
			
 
				+        df_entity = df_entity[1:]
			
 
				+
			
 
				+        # unit is always Gg
			
 
				+        df_entity.loc[:, "unit"] = inv_conf_per_entity[entity]["unit"]
			
 
				+
			
 
				+        # only one entity per table
			
 
				+        df_entity.loc[:, "entity"] = entity
			
 
				+
			
 
				+        # TODO: Fix pandas "set value on slice of copy" warning
			
 
				+        df_entity.loc[:, "category"] = df_entity.loc[
			
 
				+            :, inv_conf_per_entity[entity]["category_column"]
			
 
				+        ]
			
 
				+
			
 
				+        if "rows_to_drop" in inv_conf_per_entity[entity]:
			
 
				+            for row in inv_conf_per_entity[entity]["rows_to_drop"]:
			
 
				+                row_to_delete = df_entity.index[df_entity["category"] == row][0]
			
 
				+                df_entity = df_entity.drop(index=row_to_delete)
			
 
				+
			
 
				+        df_entity.loc[:, "category"] = df_entity.loc[:, "category"].replace(
			
 
				+            inv_conf_per_entity[entity]["cat_codes_manual"]
			
 
				+        )
			
 
				 
			
 
				-    for page in inv_conf_per_entity[entity]["page_defs"].keys():
			
 
				-        print(f"Reading page {page}.")
			
 
				+        df_entity.loc[:, "category"] = df_entity["category"].str.replace(
			
 
				+            inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+        )
			
 
				 
			
 
				-        tables_inventory_original = camelot.read_pdf(
			
 
				-            str(input_folder / pdf_file),
			
 
				-            pages=page,
			
 
				-            table_areas=inv_conf_per_entity[entity]["page_defs"][page]["area"],
			
 
				-            columns=inv_conf_per_entity[entity]["page_defs"][page]["cols"],
			
 
				-            flavor="stream",
			
 
				-            split_text=True,
			
 
				+        df_entity = df_entity.drop(
			
 
				+            columns=inv_conf_per_entity[entity]["columns_to_drop"]
			
 
				         )
			
 
				-        df_page = tables_inventory_original[0].df
			
 
				 
			
 
				-        if df_entity is None:
			
 
				-            df_entity = df_page
			
 
				+        for year in inv_conf_per_entity[entity]["years"]:
			
 
				+            df_entity.loc[:, year] = df_entity[year].str.replace(",", "")
			
 
				+
			
 
				+        if df_trend is None:
			
 
				+            df_trend = df_entity
			
 
				         else:
			
 
				-            df_entity = pd.concat(
			
 
				-                [df_entity, df_page],
			
 
				+            df_trend = pd.concat(
			
 
				+                [df_trend, df_entity],
			
 
				                 axis=0,
			
 
				                 join="outer",
			
 
				             ).reset_index(drop=True)
			
 
				-        print(f"adding table from page {page}.")
			
 
				-
			
 
				-    if "rows_to_fix" in inv_conf_per_entity[entity]:
			
 
				-        for n_rows in inv_conf_per_entity[entity]["rows_to_fix"].keys():
			
 
				-            print(f"Merge content for {n_rows=}")
			
 
				-            df_entity = fix_rows(
			
 
				-                df_entity,
			
 
				-                rows_to_fix=inv_conf_per_entity[entity]["rows_to_fix"][n_rows],
			
 
				-                col_to_use=0,
			
 
				-                n_rows=n_rows,
			
 
				-            )
			
 
				-
			
 
				-    df_entity.columns = df_entity.iloc[0, :]
			
 
				-    df_entity = df_entity[1:]
			
 
				-
			
 
				-    # unit is always Gg
			
 
				-    df_entity.loc[:, "unit"] = inv_conf_per_entity[entity]["unit"]
			
 
				-
			
 
				-    # only one entity per table
			
 
				-    df_entity.loc[:, "entity"] = entity
			
 
				-
			
 
				-    # TODO: Fix pandas "set value on slice of copy" warning
			
 
				-    df_entity.loc[:, "category"] = df_entity.loc[
			
 
				-        :, inv_conf_per_entity[entity]["category_column"]
			
 
				-    ]
			
 
				-
			
 
				-    if "rows_to_drop" in inv_conf_per_entity[entity]:
			
 
				-        for row in inv_conf_per_entity[entity]["rows_to_drop"]:
			
 
				-            row_to_delete = df_entity.index[df_entity["category"] == row][0]
			
 
				-            df_entity = df_entity.drop(index=row_to_delete)
			
 
				-
			
 
				-    df_entity.loc[:, "category"] = df_entity.loc[:, "category"].replace(
			
 
				-        inv_conf_per_entity[entity]["cat_codes_manual"]
			
 
				-    )
			
 
				 
			
 
				-    df_entity.loc[:, "category"] = df_entity["category"].str.replace(
			
 
				-        inv_conf["cat_code_regexp"], repl, regex=True
			
 
				+    ### convert to interchange format ###
			
 
				+    df_trend_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        data_wide=df_trend,
			
 
				+        coords_cols=coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				 
			
 
				-    df_entity = df_entity.drop(columns=inv_conf_per_entity[entity]["columns_to_drop"])
			
 
				+    ### convert to primap2 format ###
			
 
				+    print("Converting to primap2 format.")
			
 
				+    data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				 
			
 
				-    for year in inv_conf_per_entity[entity]["years"]:
			
 
				-        df_entity.loc[:, year] = df_entity[year].str.replace(",", "")
			
 
				+    # ###
			
 
				+    # Merge main and trend tables.
			
 
				+    # ###
			
 
				 
			
 
				-    if df_trend is None:
			
 
				-        df_trend = df_entity
			
 
				-    else:
			
 
				-        df_trend = pd.concat(
			
 
				-            [df_trend, df_entity],
			
 
				-            axis=0,
			
 
				-            join="outer",
			
 
				-        ).reset_index(drop=True)
			
 
				-
			
 
				-### convert to interchange format ###
			
 
				-df_trend_IF = pm2.pm2io.convert_wide_dataframe_if(
			
 
				-    data_wide=df_trend,
			
 
				-    coords_cols=coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    # filter_remove=filter_remove,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    time_format="%Y",
			
 
				-)
			
 
				-
			
 
				-### convert to primap2 format ###
			
 
				-print("Converting to primap2 format.")
			
 
				-data_trend_pm2 = pm2.pm2io.from_interchange_format(df_trend_IF)
			
 
				+    print("Merging main and trend table.")
			
 
				+    data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
			
 
				 
			
 
				-# ###
			
 
				-# Merge main and trend tables.
			
 
				-# ###
			
 
				+    # ###
			
 
				+    # Save raw data to IF and native format.
			
 
				+    # ###
			
 
				 
			
 
				-print("Merging main and trend table.")
			
 
				-data_pm2 = data_main_pm2.pr.merge(data_trend_pm2, tolerance=1)
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-# ###
			
 
				-# Save raw data to IF and native format.
			
 
				-# ###
			
 
				-
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				-    data_if,
			
 
				-)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + "_raw"),
			
 
				+        data_if,
			
 
				+    )
			
 
				 
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies["category"] + "_raw.nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				 
			
 
				-# ###
			
 
				-# Processing
			
 
				-# ###
			
 
				-
			
 
				-data_proc_pm2 = process_data_for_country(
			
 
				-    data_country=data_pm2,
			
 
				-    entities_to_ignore=[],
			
 
				-    gas_baskets=gas_baskets,
			
 
				-    filter_dims=None,
			
 
				-    cat_terminology_out=None,
			
 
				-    category_conversion=None,
			
 
				-    sectors_out=None,
			
 
				-    processing_info_country=country_processing_step1,
			
 
				-)
			
 
				+    # ###
			
 
				+    # Processing
			
 
				+    # ###
			
 
				+
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_country=data_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        filter_dims=None,
			
 
				+        cat_terminology_out=None,
			
 
				+        category_conversion=None,
			
 
				+        sectors_out=None,
			
 
				+        processing_info_country=country_processing_step1,
			
 
				+    )
			
 
				 
			
 
				-# ###
			
 
				-# save processed data to IF and native format
			
 
				-# ###
			
 
				+    # ###
			
 
				+    # save processed data to IF and native format
			
 
				+    # ###
			
 
				 
			
 
				-terminology_proc = coords_terminologies["category"]
			
 
				+    terminology_proc = coords_terminologies["category"]
			
 
				 
			
 
				-data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				 
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				-)
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + terminology_proc), data_proc_if
			
 
				+    )
			
 
				 
			
 
				-encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				-data_proc_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				-)
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding
			
 
				+    )
			
 
				 
			
 
				-print("Saved processed data.")
			
 
				+    print("Saved processed data.")
			
--- a/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -16,11 +16,12 @@ import re
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
			
 
				 from primap2.pm2io._data_reading import matches_time_format
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				+from .config_mne_bur3 import aggregate_cats, cat_mapping, drop_data
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
@@ -232,7 +233,7 @@ if __name__ == "__main__":
 
				     # rename the category col
			
 
				     data_if_2006 = data_if_2006.rename(
			
 
				         columns={
			
 
				-            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
			
 
				+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"  # noqa: E501
			
 
				         }
			
 
				     )
			
 
				     data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
			
@@ -276,7 +277,8 @@ if __name__ == "__main__":
 
				             ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				-            # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+            # df_combine.insert(1, "cat_name_translation",
			
 
				+            # aggregate_cats[cat_to_agg]["name"])
			
 
				             # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
--- a/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -10,7 +10,11 @@ import copy
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_mar_bur3 import (
			
 
				+from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				+from .config_mar_bur3 import (
			
 
				     aggregate_cats,
			
 
				     cat_mapping,
			
 
				     header_defs,
			
@@ -18,9 +22,6 @@ from config_mar_bur3 import (
 
				     table_defs,
			
 
				     zero_cats,
			
 
				 )
			
 
				-from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
@@ -143,7 +144,8 @@ if __name__ == "__main__":
 
				                 df_this_table.iloc[0, 1:] = ""
			
 
				                 df_this_table.iloc[1 : last_shift_row + 1, 1:] = df_temp
			
 
				 
			
 
				-            # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				+            # replace line breaks, long hyphens, double, and triple spaces in category
			
 
				+            # names
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				             df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
@@ -263,7 +265,7 @@ if __name__ == "__main__":
 
				     # rename the category col
			
 
				     data_if_2006 = data_if_2006.rename(
			
 
				         columns={
			
 
				-            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"
			
 
				+            f"category ({coords_terminologies['category']})": "category (IPCC2006_PRIMAP)"  # noqa: E501
			
 
				         }
			
 
				     )
			
 
				     data_if_2006.attrs["attrs"]["cat"] = "category (IPCC2006_PRIMAP)"
			
@@ -307,7 +309,8 @@ if __name__ == "__main__":
 
				             ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				-            # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+            # df_combine.insert(1, "cat_name_translation",
			
 
				+            # aggregate_cats[cat_to_agg]["name"])
			
 
				             # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
--- a/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Nigeria/read_NGA_BUR2_from_pdf.py
@@ -14,7 +14,15 @@ import numpy as np
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				 import xarray as xr
			
 
				-from config_nga_bur2 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_nga_bur2 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     coords_cols,
			
@@ -35,13 +43,6 @@ from config_nga_bur2 import (
 
				     year_inventory,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    gas_baskets,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
--- a/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Peru/read_PER_BUR3_from_pdf.py
@@ -11,7 +11,17 @@ import locale
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_per_bur3 import (
			
 
				+from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    fix_rows,
			
 
				+    gas_baskets,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_per_bur3 import (
			
 
				     cat_code_regexp,
			
 
				     cat_codes_manual,
			
 
				     cat_conversion,
			
@@ -31,15 +41,6 @@ from config_per_bur3 import (
 
				     table_defs,
			
 
				     values_replacement,
			
 
				 )
			
 
				-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
			
 
				-
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    fix_rows,
			
 
				-    gas_baskets,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     ### general configuration
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2021_Inventory_from_xlsx.py
@@ -12,7 +12,11 @@ import sys
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_kor_bur4 import (
			
 
				+from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				+from .config_kor_bur4 import (
			
 
				     aggregate_after_mapping,
			
 
				     aggregate_before_mapping,
			
 
				     cat_codes,
			
@@ -22,9 +26,6 @@ from config_kor_bur4 import (
 
				     filter_remove_2006,
			
 
				     filter_remove_after_agg,
			
 
				 )
			
 
				-from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2022_Inventory_from_xlsx.py
@@ -11,7 +11,11 @@ import sys
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_kor_bur4 import (
			
 
				+from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				+from .config_kor_bur4 import (
			
 
				     aggregate_after_mapping,
			
 
				     aggregate_before_mapping,
			
 
				     cat_codes,
			
@@ -21,9 +25,6 @@ from config_kor_bur4 import (
 
				     filter_remove_2006,
			
 
				     filter_remove_after_agg,
			
 
				 )
			
 
				-from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2023-Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2023-Inventory_from_xlsx.py
@@ -1,384 +0,0 @@
 
				-"""
			
 
				-Read South Korea's 2023 Inventory from Excel file
			
 
				-"""
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-
			
 
				-import pandas as pd
			
 
				-import primap2 as pm2
			
 
				-from config_KOR_INV2023 import (
			
 
				-    aggregate_after_mapping,
			
 
				-    aggregate_before_mapping,
			
 
				-    cat_codes,
			
 
				-    cat_mapping,
			
 
				-    cat_name_translations,
			
 
				-    coords_terminologies_2006,
			
 
				-    filter_remove_2006,
			
 
				-    filter_remove_after_agg,
			
 
				-    fix_rows,
			
 
				-)
			
 
				-from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				-
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				-input_folder = (
			
 
				-    downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2023-Inventory"
			
 
				-)
			
 
				-output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				-output_filename = "KOR_2023-Inventory_2023_"
			
 
				-
			
 
				-inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2021).xlsx"
			
 
				-years_to_read = range(1990, 2020 + 1)
			
 
				-
			
 
				-sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
			
 
				-cols_to_read = range(1, 2021 - 1990 + 3)
			
 
				-
			
 
				-# columns for category UNFCCC_GHG_data and original category name
			
 
				-index_cols = ["분야·부문/연도"]
			
 
				-
			
 
				-sheet_metadata = {
			
 
				-    "entity": {
			
 
				-        "온실가스": "KYOTOGHG (SARGWP100)",
			
 
				-        "CO2": "CO2",
			
 
				-        "CH4": "CH4 (SARGWP100)",
			
 
				-        "N2O": "N2O (SARGWP100)",
			
 
				-        "HFCs": "HFCS (SARGWP100)",
			
 
				-        "PFCs": "PFCS (SARGWP100)",
			
 
				-        "SF6": "SF6 (SARGWP100)",
			
 
				-    },
			
 
				-    "unit": {
			
 
				-        "온실가스": "Gg CO2 / yr",
			
 
				-        "CO2": "Gg CO2 / yr",
			
 
				-        "CH4": "Gg CO2 / yr",
			
 
				-        "N2O": "Gg CO2 / yr",
			
 
				-        "HFCs": "Gg CO2 / yr",
			
 
				-        "PFCs": "Gg CO2 / yr",
			
 
				-        "SF6": "Gg CO2 / yr",
			
 
				-    },
			
 
				-}
			
 
				-
			
 
				-# definitions for conversion to interchange format
			
 
				-time_format = "%Y"
			
 
				-
			
 
				-coords_cols = {
			
 
				-    "category": "category",
			
 
				-    "entity": "entity",
			
 
				-    "unit": "unit",
			
 
				-}
			
 
				-
			
 
				-add_coords_cols = {
			
 
				-    "orig_cat_name": ["orig_cat_name", "category"],
			
 
				-    "cat_name_translation": ["cat_name_translation", "category"],
			
 
				-}
			
 
				-
			
 
				-coords_terminologies = {
			
 
				-    "area": "ISO3",
			
 
				-    "category": "IPCC1996_KOR_INV",
			
 
				-    "scenario": "PRIMAP",
			
 
				-}
			
 
				-
			
 
				-coords_defaults = {
			
 
				-    "source": "KOR-GHG-Inventory",
			
 
				-    "provenance": "measured",
			
 
				-    "area": "KOR",
			
 
				-    "scenario": "INV2023",
			
 
				-}
			
 
				-
			
 
				-coords_value_mapping = {
			
 
				-    "cat_name_translation": cat_name_translations,
			
 
				-    "category": cat_codes,
			
 
				-}
			
 
				-
			
 
				-# filtering after IF creation to be able to use the IPCC codes
			
 
				-filter_remove = {
			
 
				-    "f1": {
			
 
				-        "category (IPCC1996_KOR_INV)": "\\IGNORE",
			
 
				-    },
			
 
				-    # "livestock": { # temp until double cat name problem is solved
			
 
				-    #     "category (IPCC1996_KOR_INV)": [
			
 
				-    #         '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				-    #         '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				-    #     ]
			
 
				-    # }
			
 
				-}
			
 
				-
			
 
				-filter_keep = {}
			
 
				-
			
 
				-meta_data = {
			
 
				-    "references": "http://www.gir.go.kr/home/board/read.do?pagerOffset=0&maxPageItems=10&maxIndexPages="
			
 
				-    "10&searchKey=&searchValue=&menuId=36&boardId=62&boardMasterId=2&boardCategoryId=",
			
 
				-    "rights": "",
			
 
				-    "contact": "mail@johannes-guetschow.de",
			
 
				-    "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2023",
			
 
				-    "comment": "Read fom xlsx file by Johannes Gütschow",
			
 
				-    "institution": "Republic of Korea, Ministry of Environment, Greenhouse Gas Inventory and Research Center",
			
 
				-}
			
 
				-
			
 
				-
			
 
				-cols_for_space_stripping = []
			
 
				-
			
 
				-compression = dict(zlib=True, complevel=9)
			
 
				-
			
 
				-# ###
			
 
				-# start data reading
			
 
				-# ###
			
 
				-
			
 
				-# change working directory to script directory for proper folder names
			
 
				-script_path = os.path.abspath(sys.argv[0])
			
 
				-script_dir_name = os.path.dirname(script_path)
			
 
				-os.chdir(script_dir_name)
			
 
				-
			
 
				-df_all = None
			
 
				-
			
 
				-for sheet in sheets_to_read:
			
 
				-    print(f"Reading sheet {sheet}.")
			
 
				-    # read current sheet (one sheet per gas)
			
 
				-    df_current = pd.read_excel(
			
 
				-        input_folder / inventory_file,
			
 
				-        sheet_name=sheet,
			
 
				-        skiprows=3,
			
 
				-        nrows=146,
			
 
				-        usecols=cols_to_read,
			
 
				-        engine="openpyxl",
			
 
				-    )
			
 
				-    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				-    # as without one of them there is no category information
			
 
				-    df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				-    # set index. necessary for the stack operation in the conversion to long format
			
 
				-    # df_current = df_current.set_index(index_cols)
			
 
				-    # make sure all col headers are str
			
 
				-    df_current.columns = df_current.columns.map(str)
			
 
				-
			
 
				-    # fix the double category issue in livestock
			
 
				-    lastrow = None
			
 
				-    for i, row in df_current.iterrows():
			
 
				-        if row["분야·부문/연도"] in fix_rows:
			
 
				-            if lastrow == "A.  장내발효":
			
 
				-                df_current.iloc[i]["분야·부문/연도"] = f'A.{df_current.iloc[i]["분야·부문/연도"]}'
			
 
				-            elif lastrow == "B.  가축분뇨처리":
			
 
				-                df_current.iloc[i]["분야·부문/연도"] = f'B.{df_current.iloc[i]["분야·부문/연도"]}'
			
 
				-            else:
			
 
				-                raise ValueError(  # noqa: TRY003
			
 
				-                    f'Row to fix, but no fix defined {lastrow}, {row["분야·부문/연도"]}'
			
 
				-                )
			
 
				-        else:
			
 
				-            lastrow = row["분야·부문/연도"]
			
 
				-    # add columns
			
 
				-    for col in sheet_metadata.keys():
			
 
				-        df_current.insert(1, col, sheet_metadata[col][sheet])
			
 
				-    # aggregate to one df
			
 
				-    if df_all is None:
			
 
				-        df_all = df_current
			
 
				-    else:
			
 
				-        df_all = pd.concat([df_all, df_current])
			
 
				-
			
 
				-df_all = df_all.reset_index(drop=True)
			
 
				-# rename category col because filtering produces problems with korean col names
			
 
				-df_all = df_all.rename(columns={"분야·부문/연도": "category"})
			
 
				-
			
 
				-# create copies of category col for further processing
			
 
				-df_all["orig_cat_name"] = df_all["category"]
			
 
				-df_all["cat_name_translation"] = df_all["category"]
			
 
				-
			
 
				-
			
 
				-# ###
			
 
				-# convert to PRIMAP2 interchange format
			
 
				-# ###
			
 
				-data_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				-    df_all,
			
 
				-    coords_cols=coords_cols,
			
 
				-    add_coords_cols=add_coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    # coords_value_filling=coords_value_filling,
			
 
				-    # filter_remove=filter_remove,
			
 
				-    # filter_keep=filter_keep,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    copy_df=True,  # we need the unchanged DF for the conversion step
			
 
				-)
			
 
				-
			
 
				-filter_data(data_if, filter_remove=filter_remove)
			
 
				-
			
 
				-# conversion to PRIMAP2 native format
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				-# convert back to IF to have units in the fixed format
			
 
				-data_pm2 = data_pm2.reset_coords(["orig_cat_name", "cat_name_translation"], drop=True)
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-# ###
			
 
				-# save data to IF and native format
			
 
				-# ###
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				-)
			
 
				-
			
 
				-data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				-
			
 
				-# ###
			
 
				-# conversion to ipcc 2006 categories
			
 
				-# ###
			
 
				-
			
 
				-
			
 
				-data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
			
 
				-    df_all,
			
 
				-    coords_cols=coords_cols,
			
 
				-    add_coords_cols=add_coords_cols,
			
 
				-    coords_defaults=coords_defaults,
			
 
				-    coords_terminologies=coords_terminologies_2006,
			
 
				-    coords_value_mapping=coords_value_mapping,
			
 
				-    meta_data=meta_data,
			
 
				-    convert_str=True,
			
 
				-    copy_df=True,  # don't mess up the dataframe when testing
			
 
				-)
			
 
				-
			
 
				-cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				-# agg before mapping
			
 
				-
			
 
				-for cat_to_agg in aggregate_before_mapping:
			
 
				-    mask = data_if_2006[cat_label].isin(aggregate_before_mapping[cat_to_agg]["sources"])
			
 
				-    df_test = data_if_2006[mask]
			
 
				-
			
 
				-    if len(df_test) > 0:
			
 
				-        print(f"Aggregating category {cat_to_agg}")
			
 
				-        df_combine = df_test.copy(deep=True)
			
 
				-
			
 
				-        time_format = "%Y"
			
 
				-        time_columns = [
			
 
				-            col
			
 
				-            for col in df_combine.columns.to_numpy()
			
 
				-            if matches_time_format(col, time_format)
			
 
				-        ]
			
 
				-
			
 
				-        for col in time_columns:
			
 
				-            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				-
			
 
				-        df_combine = df_combine.groupby(
			
 
				-            by=[
			
 
				-                "source",
			
 
				-                "scenario (PRIMAP)",
			
 
				-                "provenance",
			
 
				-                "area (ISO3)",
			
 
				-                "entity",
			
 
				-                "unit",
			
 
				-            ]
			
 
				-        ).sum()
			
 
				-
			
 
				-        df_combine = df_combine.drop(
			
 
				-            columns=[
			
 
				-                "category (IPCC2006_PRIMAP)",
			
 
				-                "orig_cat_name",
			
 
				-                "cat_name_translation",
			
 
				-            ]
			
 
				-        )
			
 
				-        df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-        df_combine.insert(
			
 
				-            1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
			
 
				-        )
			
 
				-
			
 
				-        df_combine = df_combine.reset_index()
			
 
				-
			
 
				-        if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
			
 
				-            filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				-            filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				-
			
 
				-        data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				-    else:
			
 
				-        print(f"no data to aggregate category {cat_to_agg}")
			
 
				-
			
 
				-# filtering
			
 
				-filter_data(data_if_2006, filter_remove=filter_remove_2006)
			
 
				-
			
 
				-# map 1 to 1 categories
			
 
				-data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
			
 
				-data_if_2006[cat_label].unique()
			
 
				-
			
 
				-# agg after mapping
			
 
				-
			
 
				-for cat_to_agg in aggregate_after_mapping:
			
 
				-    mask = data_if_2006[cat_label].isin(aggregate_after_mapping[cat_to_agg]["sources"])
			
 
				-    df_test = data_if_2006[mask]
			
 
				-
			
 
				-    if len(df_test) > 0:
			
 
				-        print(f"Aggregating category {cat_to_agg}")
			
 
				-        df_combine = df_test.copy(deep=True)
			
 
				-
			
 
				-        time_format = "%Y"
			
 
				-        time_columns = [
			
 
				-            col
			
 
				-            for col in df_combine.columns.to_numpy()
			
 
				-            if matches_time_format(col, time_format)
			
 
				-        ]
			
 
				-
			
 
				-        for col in time_columns:
			
 
				-            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				-
			
 
				-        df_combine = df_combine.groupby(
			
 
				-            by=[
			
 
				-                "source",
			
 
				-                "scenario (PRIMAP)",
			
 
				-                "provenance",
			
 
				-                "area (ISO3)",
			
 
				-                "entity",
			
 
				-                "unit",
			
 
				-            ]
			
 
				-        ).sum()
			
 
				-
			
 
				-        df_combine = df_combine.drop(
			
 
				-            columns=[
			
 
				-                "category (IPCC2006_PRIMAP)",
			
 
				-                "orig_cat_name",
			
 
				-                "cat_name_translation",
			
 
				-            ]
			
 
				-        )
			
 
				-        df_combine.insert(0, cat_label, cat_to_agg)
			
 
				-        df_combine.insert(
			
 
				-            1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
			
 
				-        )
			
 
				-
			
 
				-        df_combine = df_combine.reset_index()
			
 
				-
			
 
				-        if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
			
 
				-            filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				-            filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				-
			
 
				-        data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				-    else:
			
 
				-        print(f"no data to aggregate category {cat_to_agg}")
			
 
				-
			
 
				-
			
 
				-# conversion to PRIMAP2 native format
			
 
				-data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				-# convert back to IF to have units in the fixed format
			
 
				-data_pm2_2006 = data_pm2_2006.reset_coords(
			
 
				-    ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				-)
			
 
				-data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				-# save IPCC2006 data
			
 
				-
			
 
				-filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				-    data_if_2006,
			
 
				-)
			
 
				-
			
 
				-encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				-data_pm2_2006.pr.to_netcdf(
			
 
				-    output_folder / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2023_Inventory_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_2023_Inventory_from_xlsx.py
@@ -0,0 +1,398 @@
 
				+"""
			
 
				+Read South Korea's 2023 Inventory from Excel file
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+import pandas as pd
			
 
				+import primap2 as pm2
			
 
				+from primap2.pm2io._data_reading import filter_data, matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				+
			
 
				+from .config_KOR_INV2023 import (
			
 
				+    aggregate_after_mapping,
			
 
				+    aggregate_before_mapping,
			
 
				+    cat_codes,
			
 
				+    cat_mapping,
			
 
				+    cat_name_translations,
			
 
				+    coords_terminologies_2006,
			
 
				+    filter_remove_2006,
			
 
				+    filter_remove_after_agg,
			
 
				+    fix_rows,
			
 
				+)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+    input_folder = (
			
 
				+        downloaded_data_path / "non-UNFCCC" / "Republic_of_Korea" / "2023-Inventory"
			
 
				+    )
			
 
				+    output_folder = extracted_data_path / "non-UNFCCC" / "Republic_of_Korea"
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    output_filename = "KOR_2023-Inventory_2023_"
			
 
				+
			
 
				+    inventory_file = "Republic_of_Korea_National_GHG_Inventory_(1990_2021).xlsx"
			
 
				+    years_to_read = range(1990, 2020 + 1)
			
 
				+
			
 
				+    sheets_to_read = ["온실가스", "CO2", "CH4", "N2O", "HFCs", "PFCs", "SF6"]
			
 
				+    cols_to_read = range(1, 2021 - 1990 + 3)
			
 
				+
			
 
				+    # columns for category UNFCCC_GHG_data and original category name
			
 
				+    index_cols = ["분야·부문/연도"]
			
 
				+
			
 
				+    sheet_metadata = {
			
 
				+        "entity": {
			
 
				+            "온실가스": "KYOTOGHG (SARGWP100)",
			
 
				+            "CO2": "CO2",
			
 
				+            "CH4": "CH4 (SARGWP100)",
			
 
				+            "N2O": "N2O (SARGWP100)",
			
 
				+            "HFCs": "HFCS (SARGWP100)",
			
 
				+            "PFCs": "PFCS (SARGWP100)",
			
 
				+            "SF6": "SF6 (SARGWP100)",
			
 
				+        },
			
 
				+        "unit": {
			
 
				+            "온실가스": "Gg CO2 / yr",
			
 
				+            "CO2": "Gg CO2 / yr",
			
 
				+            "CH4": "Gg CO2 / yr",
			
 
				+            "N2O": "Gg CO2 / yr",
			
 
				+            "HFCs": "Gg CO2 / yr",
			
 
				+            "PFCs": "Gg CO2 / yr",
			
 
				+            "SF6": "Gg CO2 / yr",
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    # definitions for conversion to interchange format
			
 
				+    time_format = "%Y"
			
 
				+
			
 
				+    coords_cols = {
			
 
				+        "category": "category",
			
 
				+        "entity": "entity",
			
 
				+        "unit": "unit",
			
 
				+    }
			
 
				+
			
 
				+    add_coords_cols = {
			
 
				+        "orig_cat_name": ["orig_cat_name", "category"],
			
 
				+        "cat_name_translation": ["cat_name_translation", "category"],
			
 
				+    }
			
 
				+
			
 
				+    coords_terminologies = {
			
 
				+        "area": "ISO3",
			
 
				+        "category": "IPCC1996_KOR_INV",
			
 
				+        "scenario": "PRIMAP",
			
 
				+    }
			
 
				+
			
 
				+    coords_defaults = {
			
 
				+        "source": "KOR-GHG-Inventory",
			
 
				+        "provenance": "measured",
			
 
				+        "area": "KOR",
			
 
				+        "scenario": "INV2023",
			
 
				+    }
			
 
				+
			
 
				+    coords_value_mapping = {
			
 
				+        "cat_name_translation": cat_name_translations,
			
 
				+        "category": cat_codes,
			
 
				+    }
			
 
				+
			
 
				+    # filtering after IF creation to be able to use the IPCC codes
			
 
				+    filter_remove = {
			
 
				+        "f1": {
			
 
				+            "category (IPCC1996_KOR_INV)": "\\IGNORE",
			
 
				+        },
			
 
				+        # "livestock": { # temp until double cat name problem is solved
			
 
				+        #     "category (IPCC1996_KOR_INV)": [
			
 
				+        #         '4.B.1', '4.B.10', '4.B.2', '4.B.3', '4.B.4',
			
 
				+        #         '4.B.5', '4.B.6', '4.B.7', '4.B.8', '4.B.9',
			
 
				+        #     ]
			
 
				+        # }
			
 
				+    }
			
 
				+
			
 
				+    filter_keep = {}
			
 
				+
			
 
				+    meta_data = {
			
 
				+        "references": "http://www.gir.go.kr/home/board/read.do?pagerOffset=0"
			
 
				+        "&maxPageItems=10&maxIndexPages="
			
 
				+        "10&searchKey=&searchValue=&menuId=36&boardId=62&boardMasterId=2"
			
 
				+        "&boardCategoryId=",
			
 
				+        "rights": "",
			
 
				+        "contact": "mail@johannes-guetschow.de",
			
 
				+        "title": "Republic of Korea: National Greenhouse Gas Inventory Report 2023",
			
 
				+        "comment": "Read fom xlsx file by Johannes Gütschow",
			
 
				+        "institution": "Republic of Korea, Ministry of Environment, Greenhouse "
			
 
				+        "Gas Inventory and Research Center",
			
 
				+    }
			
 
				+
			
 
				+    cols_for_space_stripping = []
			
 
				+
			
 
				+    compression = dict(zlib=True, complevel=9)
			
 
				+
			
 
				+    # ###
			
 
				+    # start data reading
			
 
				+    # ###
			
 
				+
			
 
				+    # change working directory to script directory for proper folder names
			
 
				+    script_path = os.path.abspath(sys.argv[0])
			
 
				+    script_dir_name = os.path.dirname(script_path)
			
 
				+    os.chdir(script_dir_name)
			
 
				+
			
 
				+    df_all = None
			
 
				+
			
 
				+    for sheet in sheets_to_read:
			
 
				+        print(f"Reading sheet {sheet}.")
			
 
				+        # read current sheet (one sheet per gas)
			
 
				+        df_current = pd.read_excel(
			
 
				+            input_folder / inventory_file,
			
 
				+            sheet_name=sheet,
			
 
				+            skiprows=3,
			
 
				+            nrows=146,
			
 
				+            usecols=cols_to_read,
			
 
				+            engine="openpyxl",
			
 
				+        )
			
 
				+        # drop all rows where the index cols (category UNFCCC_GHG_data and name)
			
 
				+        # are both NaN
			
 
				+        # as without one of them there is no category information
			
 
				+        df_current = df_current.dropna(axis=0, how="all", subset=index_cols)
			
 
				+        # set index. necessary for the stack operation in the conversion to long format
			
 
				+        # df_current = df_current.set_index(index_cols)
			
 
				+        # make sure all col headers are str
			
 
				+        df_current.columns = df_current.columns.map(str)
			
 
				+
			
 
				+        # fix the double category issue in livestock
			
 
				+        lastrow = None
			
 
				+        for i, row in df_current.iterrows():
			
 
				+            if row["분야·부문/연도"] in fix_rows:
			
 
				+                if lastrow == "A.  장내발효":
			
 
				+                    df_current.iloc[i][
			
 
				+                        "분야·부문/연도"
			
 
				+                    ] = f'A.{df_current.iloc[i]["분야·부문/연도"]}'
			
 
				+                elif lastrow == "B.  가축분뇨처리":
			
 
				+                    df_current.iloc[i][
			
 
				+                        "분야·부문/연도"
			
 
				+                    ] = f'B.{df_current.iloc[i]["분야·부문/연도"]}'
			
 
				+                else:
			
 
				+                    raise ValueError(  # noqa: TRY003
			
 
				+                        f'Row to fix, but no fix defined {lastrow}, '
			
 
				+                        f'{row["분야·부문/연도"]}'
			
 
				+                    )
			
 
				+            else:
			
 
				+                lastrow = row["분야·부문/연도"]
			
 
				+        # add columns
			
 
				+        for col in sheet_metadata.keys():
			
 
				+            df_current.insert(1, col, sheet_metadata[col][sheet])
			
 
				+        # aggregate to one df
			
 
				+        if df_all is None:
			
 
				+            df_all = df_current
			
 
				+        else:
			
 
				+            df_all = pd.concat([df_all, df_current])
			
 
				+
			
 
				+    df_all = df_all.reset_index(drop=True)
			
 
				+    # rename category col because filtering produces problems with korean col names
			
 
				+    df_all = df_all.rename(columns={"분야·부문/연도": "category"})
			
 
				+
			
 
				+    # create copies of category col for further processing
			
 
				+    df_all["orig_cat_name"] = df_all["category"]
			
 
				+    df_all["cat_name_translation"] = df_all["category"]
			
 
				+
			
 
				+    # ###
			
 
				+    # convert to PRIMAP2 interchange format
			
 
				+    # ###
			
 
				+    data_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_all,
			
 
				+        coords_cols=coords_cols,
			
 
				+        add_coords_cols=add_coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        # coords_value_filling=coords_value_filling,
			
 
				+        # filter_remove=filter_remove,
			
 
				+        # filter_keep=filter_keep,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        copy_df=True,  # we need the unchanged DF for the conversion step
			
 
				+    )
			
 
				+
			
 
				+    filter_data(data_if, filter_remove=filter_remove)
			
 
				+
			
 
				+    # conversion to PRIMAP2 native format
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				+    # convert back to IF to have units in the fixed format
			
 
				+    data_pm2 = data_pm2.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+    # ###
			
 
				+    # save data to IF and native format
			
 
				+    # ###
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				+
			
 
				+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				+
			
 
				+    # ###
			
 
				+    # conversion to ipcc 2006 categories
			
 
				+    # ###
			
 
				+
			
 
				+    data_if_2006 = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+        df_all,
			
 
				+        coords_cols=coords_cols,
			
 
				+        add_coords_cols=add_coords_cols,
			
 
				+        coords_defaults=coords_defaults,
			
 
				+        coords_terminologies=coords_terminologies_2006,
			
 
				+        coords_value_mapping=coords_value_mapping,
			
 
				+        meta_data=meta_data,
			
 
				+        convert_str=True,
			
 
				+        copy_df=True,  # don't mess up the dataframe when testing
			
 
				+    )
			
 
				+
			
 
				+    cat_label = "category (" + coords_terminologies_2006["category"] + ")"
			
 
				+    # agg before mapping
			
 
				+
			
 
				+    for cat_to_agg in aggregate_before_mapping:
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_before_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				+        df_test = data_if_2006[mask]
			
 
				+
			
 
				+        if len(df_test) > 0:
			
 
				+            print(f"Aggregating category {cat_to_agg}")
			
 
				+            df_combine = df_test.copy(deep=True)
			
 
				+
			
 
				+            time_format = "%Y"
			
 
				+            time_columns = [
			
 
				+                col
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				+                if matches_time_format(col, time_format)
			
 
				+            ]
			
 
				+
			
 
				+            for col in time_columns:
			
 
				+                df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				+
			
 
				+            df_combine = df_combine.groupby(
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				+
			
 
				+            df_combine = df_combine.drop(
			
 
				+                columns=[
			
 
				+                    "category (IPCC2006_PRIMAP)",
			
 
				+                    "orig_cat_name",
			
 
				+                    "cat_name_translation",
			
 
				+                ]
			
 
				+            )
			
 
				+            df_combine.insert(0, cat_label, cat_to_agg)
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_before_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				+
			
 
				+            df_combine = df_combine.reset_index()
			
 
				+
			
 
				+            if cat_to_agg in aggregate_before_mapping[cat_to_agg]["sources"]:
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				+                filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				+
			
 
				+            data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				+        else:
			
 
				+            print(f"no data to aggregate category {cat_to_agg}")
			
 
				+
			
 
				+    # filtering
			
 
				+    filter_data(data_if_2006, filter_remove=filter_remove_2006)
			
 
				+
			
 
				+    # map 1 to 1 categories
			
 
				+    data_if_2006 = data_if_2006.replace({cat_label: cat_mapping})
			
 
				+    data_if_2006[cat_label].unique()
			
 
				+
			
 
				+    # agg after mapping
			
 
				+
			
 
				+    for cat_to_agg in aggregate_after_mapping:
			
 
				+        mask = data_if_2006[cat_label].isin(
			
 
				+            aggregate_after_mapping[cat_to_agg]["sources"]
			
 
				+        )
			
 
				+        df_test = data_if_2006[mask]
			
 
				+
			
 
				+        if len(df_test) > 0:
			
 
				+            print(f"Aggregating category {cat_to_agg}")
			
 
				+            df_combine = df_test.copy(deep=True)
			
 
				+
			
 
				+            time_format = "%Y"
			
 
				+            time_columns = [
			
 
				+                col
			
 
				+                for col in df_combine.columns.to_numpy()
			
 
				+                if matches_time_format(col, time_format)
			
 
				+            ]
			
 
				+
			
 
				+            for col in time_columns:
			
 
				+                df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
			
 
				+
			
 
				+            df_combine = df_combine.groupby(
			
 
				+                by=[
			
 
				+                    "source",
			
 
				+                    "scenario (PRIMAP)",
			
 
				+                    "provenance",
			
 
				+                    "area (ISO3)",
			
 
				+                    "entity",
			
 
				+                    "unit",
			
 
				+                ]
			
 
				+            ).sum()
			
 
				+
			
 
				+            df_combine = df_combine.drop(
			
 
				+                columns=[
			
 
				+                    "category (IPCC2006_PRIMAP)",
			
 
				+                    "orig_cat_name",
			
 
				+                    "cat_name_translation",
			
 
				+                ]
			
 
				+            )
			
 
				+            df_combine.insert(0, cat_label, cat_to_agg)
			
 
				+            df_combine.insert(
			
 
				+                1, "orig_cat_name", aggregate_after_mapping[cat_to_agg]["name"]
			
 
				+            )
			
 
				+
			
 
				+            df_combine = df_combine.reset_index()
			
 
				+
			
 
				+            if cat_to_agg in aggregate_after_mapping[cat_to_agg]["sources"]:
			
 
				+                filter_this_cat = {"f": {cat_label: cat_to_agg}}
			
 
				+                filter_data(data_if_2006, filter_remove=filter_this_cat)
			
 
				+
			
 
				+            data_if_2006 = pd.concat([data_if_2006, df_combine])
			
 
				+        else:
			
 
				+            print(f"no data to aggregate category {cat_to_agg}")
			
 
				+
			
 
				+    # conversion to PRIMAP2 native format
			
 
				+    data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
			
 
				+    # convert back to IF to have units in the fixed format
			
 
				+    data_pm2_2006 = data_pm2_2006.reset_coords(
			
 
				+        ["orig_cat_name", "cat_name_translation"], drop=True
			
 
				+    )
			
 
				+    data_if_2006 = data_pm2_2006.pr.to_interchange_format()
			
 
				+    # save IPCC2006 data
			
 
				+
			
 
				+    filter_data(data_if_2006, filter_remove=filter_remove_after_agg)
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies_2006["category"]),
			
 
				+        data_if_2006,
			
 
				+    )
			
 
				+
			
 
				+    encoding = {var: compression for var in data_pm2_2006.data_vars}
			
 
				+    data_pm2_2006.pr.to_netcdf(
			
 
				+        output_folder
			
 
				+        / (output_filename + coords_terminologies_2006["category"] + ".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
@@ -11,11 +11,12 @@ import sys
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_kor_bur4 import cat_codes, cat_name_translations
			
 
				 from primap2.pm2io._data_reading import filter_data
			
 
				 
			
 
				 from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				+from .config_kor_bur4 import cat_codes, cat_name_translations
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2022_Inventory_from_pdf.py
@@ -12,16 +12,20 @@ import copy
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_twn_nir2022 import (
			
 
				+from primap2.pm2io._data_reading import matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    make_wide_table,
			
 
				+)
			
 
				+
			
 
				+from .config_twn_nir2022 import (
			
 
				     fix_rows,
			
 
				     gwp_to_use,
			
 
				-    make_wide_table,
			
 
				     page_defs,
			
 
				     table_defs,
			
 
				 )
			
 
				-from primap2.pm2io._data_reading import matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import downloaded_data_path, extracted_data_path
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
@@ -82,7 +86,8 @@ if __name__ == "__main__":
 
				         "rights": "",
			
 
				         "contact": "mail@johannes-guetschow.de",
			
 
				         "title": "2022 Republic of China - National Greenhouse Gas Report",
			
 
				-        "comment": "Read fom pdf file and converted to PRIMAP2 format by Johannes Gütschow",
			
 
				+        "comment": "Read fom pdf file and converted to PRIMAP2 format by "
			
 
				+        "Johannes Gütschow",
			
 
				         "institution": "Republic of China - Environmental Protection Administration",
			
 
				     }
			
 
				 
			
@@ -186,7 +191,8 @@ if __name__ == "__main__":
 
				         for col in table_def["rows_to_fix"].keys():
			
 
				             for n_rows in table_def["rows_to_fix"][col].keys():
			
 
				                 print(f"Fixing {col}, {n_rows}")
			
 
				-                # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				+                # replace line breaks, long hyphens, double, and triple spaces in
			
 
				+                # category names
			
 
				                 df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				                     "\n", " "
			
 
				                 )
			
@@ -358,7 +364,8 @@ if __name__ == "__main__":
 
				             ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				-            # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+            # df_combine.insert(1, "cat_name_translation",
			
 
				+            # aggregate_cats[cat_to_agg]["name"])
			
 
				             # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
@@ -404,7 +411,8 @@ if __name__ == "__main__":
 
				             ).sum(min_count=1)
			
 
				 
			
 
				             df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
			
 
				-            # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
			
 
				+            # df_combine.insert(1, "cat_name_translation",
			
 
				+            # aggregate_cats[cat_to_agg]["name"])
			
 
				             # df_combine.insert(2, "orig_cat_name", "computed")
			
 
				 
			
 
				             df_combine = df_combine.reset_index()
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023-Inventory_from_pdf.py
@@ -1,258 +0,0 @@
 
				-"""
			
 
				-Read data from Taiwan's 2023 national inventory
			
 
				-
			
 
				-Data are read from the english summary pdf
			
 
				-"""
			
 
				-
			
 
				-import copy
			
 
				-
			
 
				-import camelot
			
 
				-import pandas as pd
			
 
				-import primap2 as pm2
			
 
				-from config_twn_nir2022 import fix_rows
			
 
				-from config_twn_nir2023 import (
			
 
				-    add_coords_cols,
			
 
				-    basket_copy,
			
 
				-    cat_code_regexp,
			
 
				-    cat_conversion,
			
 
				-    coords_cols,
			
 
				-    coords_defaults,
			
 
				-    coords_terminologies,
			
 
				-    coords_value_mapping,
			
 
				-    meta_data,
			
 
				-    page_defs,
			
 
				-    table_defs,
			
 
				-    terminology_proc,
			
 
				-)
			
 
				-from primap2.pm2io._data_reading import matches_time_format
			
 
				-
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    compression,
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    gas_baskets,
			
 
				-    make_wide_table,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				-# ###
			
 
				-# configuration
			
 
				-# ###
			
 
				-input_folder = downloaded_data_path / "non-UNFCCC" / "Taiwan" / "2023_NIR"
			
 
				-output_folder = extracted_data_path / "non-UNFCCC" / "Taiwan"
			
 
				-if not output_folder.exists():
			
 
				-    output_folder.mkdir()
			
 
				-
			
 
				-output_filename = "TWN_inventory_2023_"
			
 
				-inventory_file = "2023_NIR_executive_summary_english.pdf"
			
 
				-
			
 
				-
			
 
				-def repl(m):  # noqa: D103
			
 
				-    return m.group("UNFCCC_GHG_data")
			
 
				-
			
 
				-
			
 
				-# ###
			
 
				-# read the tables from pdf
			
 
				-# ###
			
 
				-
			
 
				-all_tables = []
			
 
				-for page in page_defs:
			
 
				-    print(f"Reading from page {page}")
			
 
				-    new_tables = camelot.read_pdf(
			
 
				-        str(input_folder / inventory_file),
			
 
				-        pages=page,
			
 
				-        **page_defs[page],
			
 
				-    )
			
 
				-    for table in new_tables:
			
 
				-        all_tables.append(table.df)
			
 
				-
			
 
				-
			
 
				-# ###
			
 
				-# convert tables to primap2 format
			
 
				-# ###
			
 
				-data_pm2 = None
			
 
				-for table_name in table_defs.keys():
			
 
				-    print(f"Working on table: {table_name}")
			
 
				-
			
 
				-    table_def = copy.deepcopy(table_defs[table_name])
			
 
				-    # combine all raw tables
			
 
				-    df_this_table = all_tables[table_def["tables"][0]].copy(deep=True)
			
 
				-    if len(table_def["tables"]) > 1:
			
 
				-        for table in table_def["tables"][1:]:
			
 
				-            df_this_table = pd.concat(
			
 
				-                [df_this_table, all_tables[table]], axis=0, join="outer"
			
 
				-            )
			
 
				-
			
 
				-    # fix for table ES3.6
			
 
				-    if table_name == "ES3.6":
			
 
				-        col_idx = df_this_table[0] == "Total CO Emission"
			
 
				-        df_this_table.loc[col_idx, 1:] = ""
			
 
				-        df_this_table.loc[col_idx, 0] = "Total CO2 Emission"
			
 
				-
			
 
				-    df_this_table = df_this_table.reset_index(drop=True)
			
 
				-
			
 
				-    # fix categories if necessary
			
 
				-    if "fix_cats" in table_def.keys():
			
 
				-        for col in table_def["fix_cats"]:
			
 
				-            df_this_table[col] = df_this_table[col].replace(table_def["fix_cats"][col])
			
 
				-
			
 
				-    # fix rows
			
 
				-    for col in table_def["rows_to_fix"].keys():
			
 
				-        for n_rows in table_def["rows_to_fix"][col].keys():
			
 
				-            print(f"Fixing {col}, {n_rows}")
			
 
				-            # replace line breaks, long hyphens, double, and triple spaces in category names
			
 
				-            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
			
 
				-            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("   ", " ")
			
 
				-            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("  ", " ")
			
 
				-            df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("-", "-")
			
 
				-            df_this_table = fix_rows(
			
 
				-                df_this_table, table_def["rows_to_fix"][col][n_rows], col, n_rows
			
 
				-            )
			
 
				-
			
 
				-    # split by entity
			
 
				-    if "gas_splitting" in table_def.keys():
			
 
				-        col_entity = [""] * len(df_this_table)
			
 
				-        last_entity = ""
			
 
				-        for i in range(0, len(df_this_table)):
			
 
				-            current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
			
 
				-            if current_header in table_def["gas_splitting"].keys():
			
 
				-                last_entity = table_def["gas_splitting"][current_header]
			
 
				-            col_entity[i] = last_entity
			
 
				-
			
 
				-        df_this_table["entity"] = col_entity
			
 
				-        table_def["index_cols"].append("entity")
			
 
				-
			
 
				-    # make a wide table
			
 
				-    df_this_table = make_wide_table(
			
 
				-        df_this_table,
			
 
				-        table_def["wide_keyword"],
			
 
				-        table_def["col_wide_kwd"],
			
 
				-        table_def["index_cols"],
			
 
				-    )
			
 
				-
			
 
				-    if "drop_rows" in table_def.keys():
			
 
				-        df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
			
 
				-
			
 
				-    # reset row index
			
 
				-    df_this_table = df_this_table.reset_index(drop=False)
			
 
				-
			
 
				-    # add entity
			
 
				-    if "entity" in table_def.keys():
			
 
				-        df_this_table["entity"] = table_def["entity"]
			
 
				-
			
 
				-    # add unit
			
 
				-    df_this_table["unit"] = table_def["unit"]
			
 
				-
			
 
				-    df_this_table = df_this_table.rename(
			
 
				-        {table_def["index_cols"][0]: "orig_cat_name"}, axis=1
			
 
				-    )
			
 
				-
			
 
				-    # print(table_def["index_cols"][0])
			
 
				-    # print(df_this_table.columns.values)
			
 
				-
			
 
				-    # make a copy of the categories row
			
 
				-    df_this_table["category"] = df_this_table["orig_cat_name"]
			
 
				-
			
 
				-    # replace cat names by codes in col "category"
			
 
				-    # first the manual replacements
			
 
				-    df_this_table["category"] = df_this_table["category"].replace(
			
 
				-        table_def["cat_codes_manual"]
			
 
				-    )
			
 
				-
			
 
				-    # then the regex replacements
			
 
				-    df_this_table["category"] = df_this_table["category"].str.replace(
			
 
				-        cat_code_regexp, repl, regex=True
			
 
				-    )
			
 
				-
			
 
				-    ### convert to PRIMAP2 IF
			
 
				-    # remove ','
			
 
				-    time_format = "%Y"
			
 
				-    time_columns = [
			
 
				-        col
			
 
				-        for col in df_this_table.columns.to_numpy()
			
 
				-        if matches_time_format(col, time_format)
			
 
				-    ]
			
 
				-
			
 
				-    for col in time_columns:
			
 
				-        df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
			
 
				-            ",", "", regex=False
			
 
				-        )
			
 
				-
			
 
				-    # drop orig_cat_name as it's not unique per category
			
 
				-    df_this_table = df_this_table.drop(columns="orig_cat_name")
			
 
				-
			
 
				-    # coords_defaults_this_table = coords_defaults.copy()
			
 
				-    # coords_defaults_this_table["unit"] = table_def["unit"]
			
 
				-    df_this_table_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				-        df_this_table,
			
 
				-        coords_cols=coords_cols,
			
 
				-        add_coords_cols=add_coords_cols,
			
 
				-        coords_defaults=coords_defaults,
			
 
				-        coords_terminologies=coords_terminologies,
			
 
				-        coords_value_mapping=coords_value_mapping,
			
 
				-        # coords_value_filling=coords_value_filling,
			
 
				-        # filter_remove=filter_remove,
			
 
				-        # filter_keep=filter_keep,
			
 
				-        meta_data=meta_data,
			
 
				-    )
			
 
				-
			
 
				-    this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
			
 
				-
			
 
				-    if data_pm2 is None:
			
 
				-        data_pm2 = this_table_pm2
			
 
				-    else:
			
 
				-        data_pm2 = data_pm2.pr.merge(this_table_pm2)
			
 
				-
			
 
				-# convert back to IF to have units in the fixed format
			
 
				-data_if = data_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-# ###
			
 
				-# save data
			
 
				-# ###
			
 
				-# data in original categories
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				-)
			
 
				-encoding = {var: compression for var in data_pm2.data_vars}
			
 
				-data_pm2.pr.to_netcdf(
			
 
				-    (output_folder / (output_filename + coords_terminologies["category"])).with_suffix(
			
 
				-        ".nc"
			
 
				-    ),
			
 
				-    encoding=encoding,
			
 
				-)
			
 
				-
			
 
				-
			
 
				-# ###
			
 
				-# convert to IPCC2006 categories
			
 
				-# ###
			
 
				-data_proc_pm2 = data_pm2.copy(deep=True)
			
 
				-
			
 
				-
			
 
				-country_processing = {
			
 
				-    "basket_copy": basket_copy,
			
 
				-}
			
 
				-
			
 
				-data_proc_pm2 = process_data_for_country(
			
 
				-    data_proc_pm2,
			
 
				-    entities_to_ignore=[],
			
 
				-    gas_baskets=gas_baskets,
			
 
				-    processing_info_country=country_processing,
			
 
				-    cat_terminology_out=terminology_proc,
			
 
				-    category_conversion=cat_conversion,
			
 
				-)
			
 
				-
			
 
				-# convert to IF
			
 
				-data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				-
			
 
				-# ###
			
 
				-# save data
			
 
				-# ###
			
 
				-# data in 2006 categories
			
 
				-pm2.pm2io.write_interchange_format(
			
 
				-    output_folder / (output_filename + "IPCC2006_PRIMAP"), data_proc_if
			
 
				-)
			
 
				-encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				-data_proc_pm2.pr.to_netcdf(
			
 
				-    (output_folder / (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
			
 
				-    encoding=encoding,
			
 
				-)
			
--- a/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023_Inventory_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023_Inventory_from_pdf.py
@@ -0,0 +1,266 @@
 
				+"""
			
 
				+Read data from Taiwan's 2023 national inventory
			
 
				+
			
 
				+Data are read from the english summary pdf
			
 
				+"""
			
 
				+
			
 
				+import copy
			
 
				+
			
 
				+import camelot
			
 
				+import pandas as pd
			
 
				+import primap2 as pm2
			
 
				+from primap2.pm2io._data_reading import matches_time_format
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    compression,
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    gas_baskets,
			
 
				+    make_wide_table,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_twn_nir2022 import fix_rows
			
 
				+from .config_twn_nir2023 import (
			
 
				+    add_coords_cols,
			
 
				+    basket_copy,
			
 
				+    cat_code_regexp,
			
 
				+    cat_conversion,
			
 
				+    coords_cols,
			
 
				+    coords_defaults,
			
 
				+    coords_terminologies,
			
 
				+    coords_value_mapping,
			
 
				+    meta_data,
			
 
				+    page_defs,
			
 
				+    table_defs,
			
 
				+    terminology_proc,
			
 
				+)
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # ###
			
 
				+    # configuration
			
 
				+    # ###
			
 
				+    input_folder = downloaded_data_path / "non-UNFCCC" / "Taiwan" / "2023_NIR"
			
 
				+    output_folder = extracted_data_path / "non-UNFCCC" / "Taiwan"
			
 
				+    if not output_folder.exists():
			
 
				+        output_folder.mkdir()
			
 
				+
			
 
				+    output_filename = "TWN_inventory_2023_"
			
 
				+    inventory_file = "2023_NIR_executive_summary_english.pdf"
			
 
				+
			
 
				+    def repl(m):  # noqa: D103
			
 
				+        return m.group("UNFCCC_GHG_data")
			
 
				+
			
 
				+    # ###
			
 
				+    # read the tables from pdf
			
 
				+    # ###
			
 
				+
			
 
				+    all_tables = []
			
 
				+    for page in page_defs:
			
 
				+        print(f"Reading from page {page}")
			
 
				+        new_tables = camelot.read_pdf(
			
 
				+            str(input_folder / inventory_file),
			
 
				+            pages=page,
			
 
				+            **page_defs[page],
			
 
				+        )
			
 
				+        for table in new_tables:
			
 
				+            all_tables.append(table.df)
			
 
				+
			
 
				+    # ###
			
 
				+    # convert tables to primap2 format
			
 
				+    # ###
			
 
				+    data_pm2 = None
			
 
				+    for table_name in table_defs.keys():
			
 
				+        print(f"Working on table: {table_name}")
			
 
				+
			
 
				+        table_def = copy.deepcopy(table_defs[table_name])
			
 
				+        # combine all raw tables
			
 
				+        df_this_table = all_tables[table_def["tables"][0]].copy(deep=True)
			
 
				+        if len(table_def["tables"]) > 1:
			
 
				+            for table in table_def["tables"][1:]:
			
 
				+                df_this_table = pd.concat(
			
 
				+                    [df_this_table, all_tables[table]], axis=0, join="outer"
			
 
				+                )
			
 
				+
			
 
				+        # fix for table ES3.6
			
 
				+        if table_name == "ES3.6":
			
 
				+            col_idx = df_this_table[0] == "Total CO Emission"
			
 
				+            df_this_table.loc[col_idx, 1:] = ""
			
 
				+            df_this_table.loc[col_idx, 0] = "Total CO2 Emission"
			
 
				+
			
 
				+        df_this_table = df_this_table.reset_index(drop=True)
			
 
				+
			
 
				+        # fix categories if necessary
			
 
				+        if "fix_cats" in table_def.keys():
			
 
				+            for col in table_def["fix_cats"]:
			
 
				+                df_this_table[col] = df_this_table[col].replace(
			
 
				+                    table_def["fix_cats"][col]
			
 
				+                )
			
 
				+
			
 
				+        # fix rows
			
 
				+        for col in table_def["rows_to_fix"].keys():
			
 
				+            for n_rows in table_def["rows_to_fix"][col].keys():
			
 
				+                print(f"Fixing {col}, {n_rows}")
			
 
				+                # replace line breaks, long hyphens, double, and triple spaces in
			
 
				+                # category names
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "\n", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "   ", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "  ", " "
			
 
				+                )
			
 
				+                df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(
			
 
				+                    "-", "-"
			
 
				+                )
			
 
				+                df_this_table = fix_rows(
			
 
				+                    df_this_table, table_def["rows_to_fix"][col][n_rows], col, n_rows
			
 
				+                )
			
 
				+
			
 
				+        # split by entity
			
 
				+        if "gas_splitting" in table_def.keys():
			
 
				+            col_entity = [""] * len(df_this_table)
			
 
				+            last_entity = ""
			
 
				+            for i in range(0, len(df_this_table)):
			
 
				+                current_header = df_this_table[table_def["col_wide_kwd"]].iloc[i]
			
 
				+                if current_header in table_def["gas_splitting"].keys():
			
 
				+                    last_entity = table_def["gas_splitting"][current_header]
			
 
				+                col_entity[i] = last_entity
			
 
				+
			
 
				+            df_this_table["entity"] = col_entity
			
 
				+            table_def["index_cols"].append("entity")
			
 
				+
			
 
				+        # make a wide table
			
 
				+        df_this_table = make_wide_table(
			
 
				+            df_this_table,
			
 
				+            table_def["wide_keyword"],
			
 
				+            table_def["col_wide_kwd"],
			
 
				+            table_def["index_cols"],
			
 
				+        )
			
 
				+
			
 
				+        if "drop_rows" in table_def.keys():
			
 
				+            df_this_table = df_this_table.drop(table_def["drop_rows"], axis=0)
			
 
				+
			
 
				+        # reset row index
			
 
				+        df_this_table = df_this_table.reset_index(drop=False)
			
 
				+
			
 
				+        # add entity
			
 
				+        if "entity" in table_def.keys():
			
 
				+            df_this_table["entity"] = table_def["entity"]
			
 
				+
			
 
				+        # add unit
			
 
				+        df_this_table["unit"] = table_def["unit"]
			
 
				+
			
 
				+        df_this_table = df_this_table.rename(
			
 
				+            {table_def["index_cols"][0]: "orig_cat_name"}, axis=1
			
 
				+        )
			
 
				+
			
 
				+        # print(table_def["index_cols"][0])
			
 
				+        # print(df_this_table.columns.values)
			
 
				+
			
 
				+        # make a copy of the categories row
			
 
				+        df_this_table["category"] = df_this_table["orig_cat_name"]
			
 
				+
			
 
				+        # replace cat names by codes in col "category"
			
 
				+        # first the manual replacements
			
 
				+        df_this_table["category"] = df_this_table["category"].replace(
			
 
				+            table_def["cat_codes_manual"]
			
 
				+        )
			
 
				+
			
 
				+        # then the regex replacements
			
 
				+        df_this_table["category"] = df_this_table["category"].str.replace(
			
 
				+            cat_code_regexp, repl, regex=True
			
 
				+        )
			
 
				+
			
 
				+        ### convert to PRIMAP2 IF
			
 
				+        # remove ','
			
 
				+        time_format = "%Y"
			
 
				+        time_columns = [
			
 
				+            col
			
 
				+            for col in df_this_table.columns.to_numpy()
			
 
				+            if matches_time_format(col, time_format)
			
 
				+        ]
			
 
				+
			
 
				+        for col in time_columns:
			
 
				+            df_this_table.loc[:, col] = df_this_table.loc[:, col].str.replace(
			
 
				+                ",", "", regex=False
			
 
				+            )
			
 
				+
			
 
				+        # drop orig_cat_name as it's not unique per category
			
 
				+        df_this_table = df_this_table.drop(columns="orig_cat_name")
			
 
				+
			
 
				+        # coords_defaults_this_table = coords_defaults.copy()
			
 
				+        # coords_defaults_this_table["unit"] = table_def["unit"]
			
 
				+        df_this_table_if = pm2.pm2io.convert_wide_dataframe_if(
			
 
				+            df_this_table,
			
 
				+            coords_cols=coords_cols,
			
 
				+            add_coords_cols=add_coords_cols,
			
 
				+            coords_defaults=coords_defaults,
			
 
				+            coords_terminologies=coords_terminologies,
			
 
				+            coords_value_mapping=coords_value_mapping,
			
 
				+            # coords_value_filling=coords_value_filling,
			
 
				+            # filter_remove=filter_remove,
			
 
				+            # filter_keep=filter_keep,
			
 
				+            meta_data=meta_data,
			
 
				+        )
			
 
				+
			
 
				+        this_table_pm2 = pm2.pm2io.from_interchange_format(df_this_table_if)
			
 
				+
			
 
				+        if data_pm2 is None:
			
 
				+            data_pm2 = this_table_pm2
			
 
				+        else:
			
 
				+            data_pm2 = data_pm2.pr.merge(this_table_pm2)
			
 
				+
			
 
				+    # convert back to IF to have units in the fixed format
			
 
				+    data_if = data_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+    # ###
			
 
				+    # save data
			
 
				+    # ###
			
 
				+    # data in original categories
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + coords_terminologies["category"]), data_if
			
 
				+    )
			
 
				+    encoding = {var: compression for var in data_pm2.data_vars}
			
 
				+    data_pm2.pr.to_netcdf(
			
 
				+        (
			
 
				+            output_folder / (output_filename + coords_terminologies["category"])
			
 
				+        ).with_suffix(".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
 
				+
			
 
				+    # ###
			
 
				+    # convert to IPCC2006 categories
			
 
				+    # ###
			
 
				+    data_proc_pm2 = data_pm2.copy(deep=True)
			
 
				+
			
 
				+    country_processing = {
			
 
				+        "basket_copy": basket_copy,
			
 
				+    }
			
 
				+
			
 
				+    data_proc_pm2 = process_data_for_country(
			
 
				+        data_proc_pm2,
			
 
				+        entities_to_ignore=[],
			
 
				+        gas_baskets=gas_baskets,
			
 
				+        processing_info_country=country_processing,
			
 
				+        cat_terminology_out=terminology_proc,
			
 
				+        category_conversion=cat_conversion,
			
 
				+    )
			
 
				+
			
 
				+    # convert to IF
			
 
				+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
			
 
				+
			
 
				+    # ###
			
 
				+    # save data
			
 
				+    # ###
			
 
				+    # data in 2006 categories
			
 
				+    pm2.pm2io.write_interchange_format(
			
 
				+        output_folder / (output_filename + "IPCC2006_PRIMAP"), data_proc_if
			
 
				+    )
			
 
				+    encoding = {var: compression for var in data_proc_pm2.data_vars}
			
 
				+    data_proc_pm2.pr.to_netcdf(
			
 
				+        (output_folder / (output_filename + "IPCC2006_PRIMAP")).with_suffix(".nc"),
			
 
				+        encoding=encoding,
			
 
				+    )
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -8,7 +8,14 @@ Data are read from pdf using camelot
 
				 import camelot
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_tha_bur3 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_tha_bur3 import (
			
 
				     cat_conversion,
			
 
				     coords_cols,
			
 
				     coords_cols_indirect,
			
@@ -30,12 +37,6 @@ from config_tha_bur3 import (
 
				     trend_conf,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
@@ -152,8 +153,6 @@ if __name__ == "__main__":
 
				     )
			
 
				 
			
 
				     df_main_sector_ts = tables_main_sector_ts[0].df.iloc[2:]
			
 
				-    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				-    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				     df_main_sector_ts.columns = [trend_conf["header"], trend_conf["unit"]]
			
 
				 
			
 
				     df_main_sector_ts = df_main_sector_ts.transpose()
			
@@ -207,8 +206,6 @@ if __name__ == "__main__":
 
				     )
			
 
				 
			
 
				     df_indirect = tables_indirect[0].df.iloc[2:]
			
 
				-    # df_header = pd.DataFrame([header_main_sector_ts, unit_main_sector_ts])
			
 
				-    # df_main_sector_ts = pd.concat([df_header, df_main_sector_ts], axis=0, join='outer')
			
 
				     df_indirect.columns = [ind_conf["header"], ind_conf["unit"]]
			
 
				 
			
 
				     df_indirect = df_indirect.transpose()
			
--- a/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
+++ b/src/unfccc_ghg_data/unfccc_reader/Thailand/read_THA_BUR4_from_pdf.py
@@ -21,7 +21,14 @@ tables
 
				 
			
 
				 import pandas as pd
			
 
				 import primap2 as pm2
			
 
				-from config_tha_bur4 import (
			
 
				+
			
 
				+from unfccc_ghg_data.helper import (
			
 
				+    downloaded_data_path,
			
 
				+    extracted_data_path,
			
 
				+    process_data_for_country,
			
 
				+)
			
 
				+
			
 
				+from .config_tha_bur4 import (
			
 
				     cat_codes_manual_main_sector_ts,
			
 
				     cat_conversion,
			
 
				     coords_cols,
			
@@ -42,12 +49,6 @@ from config_tha_bur4 import (
 
				     terminology_proc,
			
 
				 )
			
 
				 
			
 
				-from unfccc_ghg_data.helper import (
			
 
				-    downloaded_data_path,
			
 
				-    extracted_data_path,
			
 
				-    process_data_for_country,
			
 
				-)
			
 
				-
			
 
				 if __name__ == "__main__":
			
 
				     # ###
			
 
				     # configuration
			
--- a/tests/unit/test_operations.py
+++ b/tests/unit/test_operations.py
@@ -1,11 +0,0 @@
 
				-"""
			
 
				-Test operations
			
 
				-
			
 
				-This module is just there to help with doc building etc. on
			
 
				-project creation. You will probably delete it early in the project.
			
 
				-"""
			
 
				-from unfccc_ghg_data.operations import add_two
			
 
				-
			
 
				-
			
 
				-def test_add_two():
			
 
				-    assert add_two(3, 4) == 7