Преглед изворни кода

[DATALAD] Recorded changes

Daniel Busch пре 3 месеци
родитељ
комит
162b81a245

+ 61 - 0
dodo.py

@@ -0,0 +1,61 @@
+"""
+Define tasks to download and read the FAO data set.
+"""
+import datalad.api
+
+from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
+
+
+def get_output_folders(domains_and_releases_to_read):
+    """Get the paths of folders where output files will be saved"""
+    output_folders = []
+    # todo remove hard coded key
+    for domain, release in domains_and_releases_to_read["2024"]:
+        # todo pathlib Path
+        output_folders.append(f"downloaded_data/{domain}/{release}")
+    return output_folders
+
+
+def task_test_basic_target():
+    """
+    test
+    """
+
+    def do_nothing():
+        pass
+
+    return {"actions": [do_nothing]}
+
+
+def task_test_download_target():
+    """
+    test datalad target
+    """
+
+    def datalad_run_download():
+        datalad.api.run(cmd="python3 scripts/download_all_domains.py")
+
+    return {"actions": [datalad_run_download]}
+
+
+def task_read_data():
+    """
+    read data set
+    """
+
+    def read_dataset():
+        output_folders = get_output_folders(domains_and_releases_to_read)
+
+        datalad.api.run(
+            cmd="python3 scripts/read_data_set.py",
+            message="Read data set",
+            outputs=output_folders,
+        )
+
+    return {
+        "actions": [read_dataset],
+        "params": [
+            {"name": "run_id", "long": "run_id", "type": str, "default": "2024"}
+        ],
+        "verbosity": 2,
+    }

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/5f/mq/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv
+../../.git/annex/objects/fQ/4m/MD5E-s4236841--55a238326c11e125114380c4502d7cf4.csv/MD5E-s4236841--55a238326c11e125114380c4502d7cf4.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.nc

@@ -1 +1 @@
-../../.git/annex/objects/7x/xk/MD5E-s21114760--2857537fa84990f457054a91ab674169.nc/MD5E-s21114760--2857537fa84990f457054a91ab674169.nc
+../../.git/annex/objects/MV/4K/MD5E-s2854495--45c82b789407d161ea51fde28658367a.nc/MD5E-s2854495--45c82b789407d161ea51fde28658367a.nc

+ 2 - 2
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.yaml

@@ -13,10 +13,10 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - category (FAOSTAT)
+  - area (ISO3)
   - source
+  - category (FAOSTAT)
   - scenario (FAO)
-  - area (ISO3)
   - entity
   - unit
 data_file: FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

+ 497 - 1
poetry.lock

@@ -29,6 +29,21 @@ files = [
     {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
 ]
 
+[[package]]
+name = "annexremote"
+version = "1.6.6"
+description = "git annex special remotes made easy"
+optional = false
+python-versions = "*"
+files = [
+    {file = "annexremote-1.6.6-py3-none-any.whl", hash = "sha256:dee4efa33c3bd9514928af5c57c82599ca9dc0a5535121ee234ed1833a98f93e"},
+    {file = "annexremote-1.6.6.tar.gz", hash = "sha256:5f78d0753c0763d95fc4c52050bd6212bb32457d32f6575dc66a83178e0283a7"},
+]
+
+[package.extras]
+doc = ["sphinx"]
+tests = ["coverage", "pytest"]
+
 [[package]]
 name = "appnope"
 version = "0.1.4"
@@ -91,6 +106,21 @@ files = [
 [package.extras]
 dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
 
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+description = "Backport of CPython tarfile module"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
+    {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
+]
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
+
 [[package]]
 name = "beautifulsoup4"
 version = "4.12.3"
@@ -112,6 +142,44 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]
 
+[[package]]
+name = "boto3"
+version = "1.35.72"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "boto3-1.35.72-py3-none-any.whl", hash = "sha256:410bb4ec676c57ee9c3c7824b7b1a3721584f18f8ee8ccc8e8ecdf285136b77f"},
+    {file = "boto3-1.35.72.tar.gz", hash = "sha256:f9fc94413a959c388b1654c6687a5193293f3c69f8d0af3b86fd48b4096a23f3"},
+]
+
+[package.dependencies]
+botocore = ">=1.35.72,<1.36.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.10.0,<0.11.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.35.72"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "botocore-1.35.72-py3-none-any.whl", hash = "sha256:7412877c3f766a1bfd09236e225ce1f0dc2c35e47949ae423e56e2093c8fa23a"},
+    {file = "botocore-1.35.72.tar.gz", hash = "sha256:6b5fac38ef7cfdbc7781a751e0f78833ccb9149ba815bc238b1dbb75c90fbae5"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.22.0)"]
+
 [[package]]
 name = "certifi"
 version = "2024.8.30"
@@ -213,6 +281,17 @@ files = [
     {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
 ]
 
+[[package]]
+name = "chardet"
+version = "5.2.0"
+description = "Universal encoding detector for Python 3"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"},
+    {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.0"
@@ -341,6 +420,17 @@ files = [
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
+[[package]]
+name = "cloudpickle"
+version = "3.1.0"
+description = "Pickler class to extend the standard pickle.Pickler functionality"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"},
+    {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"},
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -519,6 +609,57 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1
 [package.extras]
 toml = ["tomli"]
 
+[[package]]
+name = "cryptography"
+version = "44.0.0"
+description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+optional = false
+python-versions = "!=3.9.0,!=3.9.1,>=3.7"
+files = [
+    {file = "cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"},
+    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"},
+    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"},
+    {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"},
+    {file = "cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd"},
+    {file = "cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"},
+    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"},
+    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"},
+    {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"},
+    {file = "cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c"},
+    {file = "cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02"},
+]
+
+[package.dependencies]
+cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
+
+[package.extras]
+docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"]
+docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"]
+nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"]
+pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
+sdist = ["build (>=1.0.0)"]
+ssh = ["bcrypt (>=3.1.5)"]
+test = ["certifi (>=2024)", "cryptography-vectors (==44.0.0)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
+test-randomorder = ["pytest-randomly"]
+
 [[package]]
 name = "cycler"
 version = "0.12.1"
@@ -534,6 +675,51 @@ files = [
 docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
 
+[[package]]
+name = "datalad"
+version = "1.1.4"
+description = "data distribution geared toward scientific datasets"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "datalad-1.1.4-py3-none-any.whl", hash = "sha256:07a95efe456c82e7a8ca8e15a85bac3e588bc189abd4ddbace00f47d259ddaa9"},
+    {file = "datalad-1.1.4.tar.gz", hash = "sha256:db9286a4baf74d53d23c3bfb5928452197d57f36abed8ba0af334a3dd038166b"},
+]
+
+[package.dependencies]
+annexremote = "*"
+boto3 = "*"
+chardet = ">=3.0.4"
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+distro = "*"
+fasteners = ">=0.14"
+humanize = "*"
+iso8601 = "*"
+keyring = ">=20.0,<23.9.0 || >23.9.0"
+"keyrings.alt" = "*"
+looseversion = "*"
+msgpack = "*"
+packaging = "*"
+patool = ">=1.7"
+platformdirs = "*"
+python-gitlab = "*"
+requests = ">=1.2"
+tqdm = ">=4.32.0"
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+core = ["annexremote", "chardet (>=3.0.4)", "colorama", "distro", "fasteners (>=0.14)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "looseversion", "packaging", "patool (>=1.7)", "platformdirs", "tqdm (>=4.32.0)", "typing-extensions (>=4.0.0)"]
+devel = ["BeautifulSoup4", "annexremote", "argcomplete (>=1.12.3)", "asv", "boto3", "chardet (>=3.0.4)", "colorama", "coverage (!=7.6.5)", "distro", "duecredit", "fasteners (>=0.14)", "gprof2dot", "httpretty (>=0.9.4)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "looseversion", "msgpack", "mypy", "packaging", "patool (>=1.7)", "platformdirs", "psutil", "pypandoc", "pyperclip", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "pytest-xdist", "python-dateutil", "python-gitlab", "requests (>=1.2)", "requests-ftp", "scriv", "sphinx (>=4.3.0)", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=0.5.1)", "tqdm (>=4.32.0)", "types-python-dateutil", "types-requests", "typing-extensions (>=4.0.0)", "vcrpy"]
+devel-docs = ["pypandoc", "sphinx (>=4.3.0)", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=0.5.1)"]
+devel-utils = ["asv", "coverage (!=7.6.5)", "gprof2dot", "psutil", "pytest-xdist", "scriv"]
+downloaders = ["boto3", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "msgpack", "requests (>=1.2)"]
+downloaders-extra = ["requests-ftp"]
+duecredit = ["duecredit"]
+full = ["BeautifulSoup4", "annexremote", "argcomplete (>=1.12.3)", "boto3", "chardet (>=3.0.4)", "colorama", "distro", "duecredit", "fasteners (>=0.14)", "httpretty (>=0.9.4)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "looseversion", "msgpack", "mypy", "packaging", "patool (>=1.7)", "platformdirs", "pyperclip", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "python-dateutil", "python-gitlab", "requests (>=1.2)", "requests-ftp", "tqdm (>=4.32.0)", "types-python-dateutil", "types-requests", "typing-extensions (>=4.0.0)", "vcrpy"]
+misc = ["argcomplete (>=1.12.3)", "pyperclip", "python-dateutil"]
+publish = ["python-gitlab"]
+tests = ["BeautifulSoup4", "httpretty (>=0.9.4)", "mypy", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "types-python-dateutil", "types-requests", "vcrpy"]
+
 [[package]]
 name = "debugpy"
 version = "1.8.8"
@@ -591,6 +777,17 @@ files = [
     {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
 [[package]]
 name = "docutils"
 version = "0.19"
@@ -602,6 +799,24 @@ files = [
     {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
 ]
 
+[[package]]
+name = "doit"
+version = "0.36.0"
+description = "doit - Automation Tool"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "doit-0.36.0-py3-none-any.whl", hash = "sha256:ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a"},
+    {file = "doit-0.36.0.tar.gz", hash = "sha256:71d07ccc9514cb22fe59d98999577665eaab57e16f644d04336ae0b4bae234bc"},
+]
+
+[package.dependencies]
+cloudpickle = "*"
+importlib-metadata = ">=4.4"
+
+[package.extras]
+toml = ["tomli"]
+
 [[package]]
 name = "et-xmlfile"
 version = "2.0.0"
@@ -641,6 +856,17 @@ files = [
 [package.extras]
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
 
+[[package]]
+name = "fasteners"
+version = "0.19"
+description = "A python package that provides useful locks"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"},
+    {file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"},
+]
+
 [[package]]
 name = "fastjsonschema"
 version = "2.20.0"
@@ -942,6 +1168,20 @@ files = [
 [package.dependencies]
 numpy = ">=1.19.3"
 
+[[package]]
+name = "humanize"
+version = "4.11.0"
+description = "Python humanize utilities"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "humanize-4.11.0-py3-none-any.whl", hash = "sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0"},
+    {file = "humanize-4.11.0.tar.gz", hash = "sha256:e66f36020a2d5a974c504bd2555cf770621dbdbb6d82f94a6857c0b1ea2608be"},
+]
+
+[package.extras]
+tests = ["freezegun", "pytest", "pytest-cov"]
+
 [[package]]
 name = "identify"
 version = "2.6.2"
@@ -1104,6 +1344,75 @@ qtconsole = ["qtconsole"]
 test = ["packaging", "pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"]
 test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
 
+[[package]]
+name = "iso8601"
+version = "2.1.0"
+description = "Simple module to parse ISO 8601 dates"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+    {file = "iso8601-2.1.0-py3-none-any.whl", hash = "sha256:aac4145c4dcb66ad8b648a02830f5e2ff6c24af20f4f482689be402db2429242"},
+    {file = "iso8601-2.1.0.tar.gz", hash = "sha256:6b1d3829ee8921c4301998c909f7829fa9ed3cbdac0d3b16af2d743aed1ba8df"},
+]
+
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+description = "Utility functions for Python class constructs"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790"},
+    {file = "jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd"},
+]
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+description = "Useful decorators and context managers"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4"},
+    {file = "jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3"},
+]
+
+[package.dependencies]
+"backports.tarfile" = {version = "*", markers = "python_version < \"3.12\""}
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["portend", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.1.0"
+description = "Functools like those found in stdlib"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.functools-4.1.0-py3-none-any.whl", hash = "sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649"},
+    {file = "jaraco_functools-4.1.0.tar.gz", hash = "sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d"},
+]
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["jaraco.classes", "pytest (>=6,!=8.1.*)"]
+type = ["pytest-mypy"]
+
 [[package]]
 name = "jedi"
 version = "0.19.2"
@@ -1123,6 +1432,21 @@ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alab
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
 
+[[package]]
+name = "jeepney"
+version = "0.8.0"
+description = "Low-level, pure Python DBus protocol wrapper."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"},
+    {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"},
+]
+
+[package.extras]
+test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"]
+trio = ["async_generator", "trio"]
+
 [[package]]
 name = "jinja2"
 version = "3.1.4"
@@ -1140,6 +1464,17 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.23.0"
@@ -1273,6 +1608,54 @@ test-functional = ["pytest", "pytest-randomly", "pytest-xdist"]
 test-integration = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-randomly", "pytest-xdist"]
 test-ui = ["calysto-bash"]
 
+[[package]]
+name = "keyring"
+version = "25.5.0"
+description = "Store and access your passwords safely."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "keyring-25.5.0-py3-none-any.whl", hash = "sha256:e67f8ac32b04be4714b42fe84ce7dad9c40985b9ca827c592cc303e7c26d9741"},
+    {file = "keyring-25.5.0.tar.gz", hash = "sha256:4c753b3ec91717fe713c4edd522d625889d8973a349b0e582622f49766de58e6"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""}
+"jaraco.classes" = "*"
+"jaraco.context" = "*"
+"jaraco.functools" = "*"
+jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
+pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""}
+SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+completion = ["shtab (>=1.1.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["pyfakefs", "pytest (>=6,!=8.1.*)"]
+type = ["pygobject-stubs", "pytest-mypy", "shtab", "types-pywin32"]
+
+[[package]]
+name = "keyrings-alt"
+version = "5.0.2"
+description = "Alternate keyring implementations"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "keyrings.alt-5.0.2-py3-none-any.whl", hash = "sha256:6be74693192f3f37bbb752bfac9b86e6177076b17d2ac12a390f1d6abff8ac7c"},
+    {file = "keyrings_alt-5.0.2.tar.gz", hash = "sha256:8f097ebe9dc8b185106502b8cdb066c926d2180e13b4689fd4771a3eab7d69fb"},
+]
+
+[package.dependencies]
+"jaraco.classes" = "*"
+"jaraco.context" = "*"
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["gdata", "keyring (>=20)", "pycryptodome", "pycryptodomex", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "python-keyczar"]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -1459,6 +1842,17 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
 [package.extras]
 dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
 
+[[package]]
+name = "looseversion"
+version = "1.3.0"
+description = "Version numbering for anarchists and software realists"
+optional = false
+python-versions = "*"
+files = [
+    {file = "looseversion-1.3.0-py2.py3-none-any.whl", hash = "sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0"},
+    {file = "looseversion-1.3.0.tar.gz", hash = "sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e"},
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "2.2.0"
@@ -1660,6 +2054,17 @@ files = [
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
 ]
 
+[[package]]
+name = "more-itertools"
+version = "10.5.0"
+description = "More routines for operating on iterables, beyond itertools"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "more-itertools-10.5.0.tar.gz", hash = "sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6"},
+    {file = "more_itertools-10.5.0-py3-none-any.whl", hash = "sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef"},
+]
+
 [[package]]
 name = "msgpack"
 version = "1.1.0"
@@ -2188,6 +2593,20 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "patool"
+version = "3.1.0"
+description = "portable archive file manager"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "patool-3.1.0-py2.py3-none-any.whl", hash = "sha256:401a918bdbf65434fd59c038bdb2c15ff7185675aedddb4494330c3e8e4fe80d"},
+    {file = "patool-3.1.0.tar.gz", hash = "sha256:417ed1ff7b9c979ce8a10114ed9bc280b08e3af3df3072e761303a3e00aaba04"},
+]
+
+[package.extras]
+argcompletion = ["argcomplete (==3.5.1)"]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -2651,6 +3070,26 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]
 
+[[package]]
+name = "python-gitlab"
+version = "5.1.0"
+description = "A python wrapper for the GitLab API"
+optional = false
+python-versions = ">=3.9.0"
+files = [
+    {file = "python_gitlab-5.1.0-py3-none-any.whl", hash = "sha256:c30cf547392ce66daaaf020839cfb6c15a91b26e2e7054d1b3f1b92e8dd65e7d"},
+    {file = "python_gitlab-5.1.0.tar.gz", hash = "sha256:d5a10dae8328f32fb9214bd3f9dc199b4930cd496f81c9be42a0f8ff338aeb35"},
+]
+
+[package.dependencies]
+requests = ">=2.32.0"
+requests-toolbelt = ">=1.0.0"
+
+[package.extras]
+autocompletion = ["argcomplete (>=1.10.0,<3)"]
+graphql = ["gql[httpx] (>=3.5.0,<4)"]
+yaml = ["PyYaml (>=6.0.1)"]
+
 [[package]]
 name = "pytz"
 version = "2024.2"
@@ -2689,6 +3128,17 @@ files = [
     {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
 ]
 
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+description = "A (partial) reimplementation of pywin32 using ctypes/cffi"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755"},
+    {file = "pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.2"
@@ -2908,6 +3358,20 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requests-toolbelt"
+version = "1.0.0"
+description = "A utility belt for advanced users of python-requests"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
+    {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
+]
+
+[package.dependencies]
+requests = ">=2.0.1,<3.0.0"
+
 [[package]]
 name = "rpds-py"
 version = "0.21.0"
@@ -3101,6 +3565,23 @@ files = [
     {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"},
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.10.4"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"},
+    {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"},
+]
+
+[package.dependencies]
+botocore = ">=1.33.2,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+
 [[package]]
 name = "scipy"
 version = "1.14.1"
@@ -3151,6 +3632,21 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest
 doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"]
 test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
+[[package]]
+name = "secretstorage"
+version = "3.3.3"
+description = "Python bindings to FreeDesktop.org Secret Service API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"},
+    {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"},
+]
+
+[package.dependencies]
+cryptography = ">=2.0"
+jeepney = ">=0.6"
+
 [[package]]
 name = "selenium"
 version = "4.26.1"
@@ -3959,4 +4455,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "ba984aa6c4d56e62124fc30db68f1f9f7140a6fe95b7f50d471b30bbb9009f22"
+content-hash = "75b95a9cbc4a5d95ef30465ed470a2cf944ae0c7fcd9a32884c72544318ad507"

+ 2 - 0
pyproject.toml

@@ -19,6 +19,8 @@ pandas = "^2.2.3"
 pycountry = "^24.6.1"
 primap2 = "^0.11.2"
 pandas-stubs = "^2.2.3.241009"
+datalad = "^1.1.4"
+doit = "^0.36.0"
 
 
 [tool.poetry.group.tests.dependencies]

+ 120 - 3
requirements.txt

@@ -1,13 +1,25 @@
+annexremote==1.6.6 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:5f78d0753c0763d95fc4c52050bd6212bb32457d32f6575dc66a83178e0283a7 \
+    --hash=sha256:dee4efa33c3bd9514928af5c57c82599ca9dc0a5535121ee234ed1833a98f93e
 attrs==24.2.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \
     --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2
+backports-tarfile==1.2.0 ; python_version >= "3.10" and python_version < "3.12" \
+    --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \
+    --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991
 beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051 \
     --hash=sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed
+boto3==1.35.72 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:410bb4ec676c57ee9c3c7824b7b1a3721584f18f8ee8ccc8e8ecdf285136b77f \
+    --hash=sha256:f9fc94413a959c388b1654c6687a5193293f3c69f8d0af3b86fd48b4096a23f3
+botocore==1.35.72 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6b5fac38ef7cfdbc7781a751e0f78833ccb9149ba815bc238b1dbb75c90fbae5 \
+    --hash=sha256:7412877c3f766a1bfd09236e225ce1f0dc2c35e47949ae423e56e2093c8fa23a
 certifi==2024.8.30 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \
     --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9
-cffi==1.17.1 ; os_name == "nt" and implementation_name != "pypy" and python_version >= "3.10" and python_version < "3.13" \
+cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.13" and os_name == "nt" and implementation_name != "pypy" or python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" and platform_python_implementation != "PyPy" \
     --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \
     --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \
     --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \
@@ -75,6 +87,9 @@ cffi==1.17.1 ; os_name == "nt" and implementation_name != "pypy" and python_vers
     --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \
     --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \
     --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b
+chardet==5.2.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
+    --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
 charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621 \
     --hash=sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6 \
@@ -181,7 +196,10 @@ charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.13"
     --hash=sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b \
     --hash=sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079 \
     --hash=sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482
-colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") \
+cloudpickle==3.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b \
+    --hash=sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (platform_system == "Windows" or sys_platform == "win32") \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
 contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.13" \
@@ -239,15 +257,57 @@ contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81 \
     --hash=sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9 \
     --hash=sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375
+cryptography==44.0.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7 \
+    --hash=sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731 \
+    --hash=sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b \
+    --hash=sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc \
+    --hash=sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543 \
+    --hash=sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385 \
+    --hash=sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c \
+    --hash=sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591 \
+    --hash=sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede \
+    --hash=sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb \
+    --hash=sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f \
+    --hash=sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123 \
+    --hash=sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c \
+    --hash=sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba \
+    --hash=sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c \
+    --hash=sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285 \
+    --hash=sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd \
+    --hash=sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092 \
+    --hash=sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa \
+    --hash=sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289 \
+    --hash=sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02 \
+    --hash=sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64 \
+    --hash=sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053 \
+    --hash=sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417 \
+    --hash=sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e \
+    --hash=sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e \
+    --hash=sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7 \
+    --hash=sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756 \
+    --hash=sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4
 cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 \
     --hash=sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c
+datalad==1.1.4 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:07a95efe456c82e7a8ca8e15a85bac3e588bc189abd4ddbace00f47d259ddaa9 \
+    --hash=sha256:db9286a4baf74d53d23c3bfb5928452197d57f36abed8ba0af334a3dd038166b
+distro==1.9.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed \
+    --hash=sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2
+doit==0.36.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:71d07ccc9514cb22fe59d98999577665eaab57e16f644d04336ae0b4bae234bc \
+    --hash=sha256:ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a
 et-xmlfile==2.0.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa \
     --hash=sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
+fasteners==0.19 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237 \
+    --hash=sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c
 flexcache==0.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656 \
     --hash=sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32
@@ -341,9 +401,39 @@ h5py==3.12.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:d2b8dd64f127d8b324f5d2cd1c0fd6f68af69084e9e47d27efeb9e28e685af3e \
     --hash=sha256:d3e465aee0ec353949f0f46bf6c6f9790a2006af896cee7c178a8c3e5090aa32 \
     --hash=sha256:e4d51919110a030913201422fb07987db4338eba5ec8c5a15d6fab8e03d443fc
+humanize==4.11.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0 \
+    --hash=sha256:e66f36020a2d5a974c504bd2555cf770621dbdbb6d82f94a6857c0b1ea2608be
 idna==3.10 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
     --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
+importlib-metadata==8.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \
+    --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7
+iso8601==2.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6b1d3829ee8921c4301998c909f7829fa9ed3cbdac0d3b16af2d743aed1ba8df \
+    --hash=sha256:aac4145c4dcb66ad8b648a02830f5e2ff6c24af20f4f482689be402db2429242
+jaraco-classes==3.4.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \
+    --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790
+jaraco-context==6.0.1 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \
+    --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4
+jaraco-functools==4.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \
+    --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649
+jeepney==0.8.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \
+    --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755
+jmespath==1.0.1 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \
+    --hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe
+keyring==25.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:4c753b3ec91717fe713c4edd522d625889d8973a349b0e582622f49766de58e6 \
+    --hash=sha256:e67f8ac32b04be4714b42fe84ce7dad9c40985b9ca827c592cc303e7c26d9741
+keyrings-alt==5.0.2 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6be74693192f3f37bbb752bfac9b86e6177076b17d2ac12a390f1d6abff8ac7c \
+    --hash=sha256:8f097ebe9dc8b185106502b8cdb066c926d2180e13b4689fd4771a3eab7d69fb
 kiwisolver==1.4.7 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a \
     --hash=sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95 \
@@ -484,6 +574,9 @@ llvmlite==0.43.0 ; python_version >= "3.10" and python_version < "3.13" \
 loguru==0.7.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb \
     --hash=sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac
+looseversion==1.3.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0 \
+    --hash=sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e
 matplotlib==3.9.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21 \
     --hash=sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5 \
@@ -525,6 +618,9 @@ matplotlib==3.9.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49 \
     --hash=sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c \
     --hash=sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413
+more-itertools==10.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \
+    --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6
 msgpack==1.1.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b \
     --hash=sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf \
@@ -710,6 +806,9 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
+patool==3.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:401a918bdbf65434fd59c038bdb2c15ff7185675aedddb4494330c3e8e4fe80d \
+    --hash=sha256:417ed1ff7b9c979ce8a10114ed9bc280b08e3af3df3072e761303a3e00aaba04
 pillow==11.0.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7 \
     --hash=sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5 \
@@ -801,7 +900,7 @@ primap2==0.11.2 ; python_version >= "3.10" and python_version < "3.13" \
 pycountry==24.6.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221 \
     --hash=sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f
-pycparser==2.22 ; os_name == "nt" and implementation_name != "pypy" and python_version >= "3.10" and python_version < "3.13" \
+pycparser==2.22 ; python_version >= "3.10" and python_version < "3.13" and os_name == "nt" and implementation_name != "pypy" or python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" and platform_python_implementation != "PyPy" \
     --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
     --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
 pyparsing==3.2.0 ; python_version >= "3.10" and python_version < "3.13" \
@@ -817,9 +916,18 @@ python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.
 python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca \
     --hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
+python-gitlab==5.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:c30cf547392ce66daaaf020839cfb6c15a91b26e2e7054d1b3f1b92e8dd65e7d \
+    --hash=sha256:d5a10dae8328f32fb9214bd3f9dc199b4930cd496f81c9be42a0f8ff338aeb35
 pytz==2024.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a \
     --hash=sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725
+pywin32-ctypes==0.2.3 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "win32" \
+    --hash=sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8 \
+    --hash=sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755
+requests-toolbelt==1.0.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \
+    --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06
 requests==2.32.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
     --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
@@ -868,6 +976,9 @@ ruamel-yaml-clib==0.2.12 ; platform_python_implementation == "CPython" and pytho
 ruamel-yaml==0.18.6 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:57b53ba33def16c4f3d807c0ccbc00f8a6081827e81ba2491691b76882d0c636 \
     --hash=sha256:8b27e6a217e786c6fbe5634d8f3f11bc63e0f80f6a5890f28863d9c45aac311b
+s3transfer==0.10.4 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e \
+    --hash=sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7
 scipy==1.14.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e \
     --hash=sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79 \
@@ -902,6 +1013,9 @@ scipy==1.14.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc \
     --hash=sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1 \
     --hash=sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2
+secretstorage==3.3.3 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \
+    --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99
 selenium==4.26.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:1db3f3a0cd5bb07624fa8a3905a6fdde1595a42185a0617077c361dc53d104fb \
     --hash=sha256:7640f3f08ae7f4e450f895678e8a10a55eb4e4ca18311ed675ecc4684b96b683
@@ -968,3 +1082,6 @@ wsproto==1.2.0 ; python_version >= "3.10" and python_version < "3.13" \
 xarray==2024.10.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:ae1d38cb44a0324dfb61e492394158ae22389bf7de9f3c174309c17376df63a0 \
     --hash=sha256:e369e2bac430e418c2448e5b96f07da4635f98c1319aa23cfeb3fbcb9a01d2e0
+zipp==3.21.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
+    --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931

+ 31 - 0
scripts/read_data_set.py

@@ -0,0 +1,31 @@
+"""Read selected domains and versions."""
+from pathlib import Path
+
+import click
+
+from faostat_data_primap.helper.definitions import domains_and_releases_to_read
+from faostat_data_primap.helper.paths import (
+    extracted_data_path,
+)
+from faostat_data_primap.read import (
+    read_data,
+)
+
+
+@click.command()
+@click.option("--run_id", default="2024", help="Configuration to run")
+@click.option("--save_path", default=None, help="Where to save data in root directory.")
+def run(run_id, save_path):
+    """Prepare and run read data function"""
+    if not save_path:
+        save_path = extracted_data_path
+    else:
+        save_path = Path(save_path)
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read[run_id],
+        save_path=save_path,
+    )
+
+
+if __name__ == "__main__":
+    run()

+ 14 - 0
src/faostat_data_primap/helper/definitions.py

@@ -422,3 +422,17 @@ config_to_if = {
         "institution": ("Food and Agriculture Organization of the United Nations"),
     },
 }
+
+# TODO maybe this should live somewhere else?
+# Definition of the domains and releases to be read
+domains_and_releases_to_read = {
+    "2024": (
+        # ("farm_gate_agriculture_energy", "2024-11-14"),
+        # ("farm_gate_emissions_crops", "2024-11-14"),
+        # ("farm_gate_livestock", "2024-11-14"),
+        # ("land_use_drained_organic_soils", "2024-11-14"),
+        # ("land_use_fires", "2024-11-14"),
+        # ("land_use_forests", "2024-11-14"),
+        ("pre_post_agricultural_production", "2024-11-14"),
+    )
+}

+ 40 - 18
src/faostat_data_primap/read.py

@@ -60,26 +60,18 @@ def get_latest_release(domain_path: pathlib.Path) -> str:
     return sorted(all_releases, reverse=True)[0]
 
 
-def read_latest_data(
-    downloaded_data_path: pathlib.Path = downloaded_data_path,
-    save_path: pathlib.Path = extracted_data_path,
-) -> None:
+def read_data(domains_and_releases_to_read, save_path) -> None:
     """
-    Read and save the latest data
+    Read specified domains and releases.
 
-    Converts downloaded data into interchange format and primap2 native format
-    and saves the files in the extracted_data directory.
+    Parameters
+    ----------
+    domains_and_releases_to_read
+    save_path
 
     """
-    domains = get_all_domains(downloaded_data_path)
-
-    files_to_read = []
-    for domain in domains:
-        domain_path = downloaded_data_path / domain
-        files_to_read.append((domain, get_latest_release(domain_path)))
-
     df_list = []
-    for domain, release in files_to_read:
+    for domain, release in domains_and_releases_to_read:
         read_config = read_config_all[domain][release]
 
         print(f"Read {read_config['filename']}")
@@ -129,8 +121,11 @@ def read_latest_data(
 
     df_all = pd.concat(df_list, axis=0, join="outer", ignore_index=True)
 
-    # sometimes Source is empty
-    df_all["Source"] = df_all["Source"].fillna("unknown")
+    # some domains don't have Source column or values are empty
+    if "Source" not in df_all.columns:
+        df_all["Source"] = "unknown"
+    else:
+        df_all["Source"] = df_all["Source"].fillna("unknown")
 
     # Remove the "Y" prefix for the years columns
     df_all = df_all.rename(columns=lambda x: x.lstrip("Y") if x.startswith("Y") else x)
@@ -139,7 +134,9 @@ def read_latest_data(
     df_all["Unit"] = df_all["entity"] + " * " + df_all["Unit"] + "/ year"
     df_all["Unit"] = df_all["Unit"].replace(read_config_all["replace_units"])
 
-    date_last_updated = sorted([i[1] for i in files_to_read], reverse=True)[0]
+    date_last_updated = sorted(
+        [i[1] for i in domains_and_releases_to_read], reverse=True
+    )[0]
     release_name = f"v{date_last_updated}"
 
     data_if = pm2.pm2io.convert_wide_dataframe_if(
@@ -171,6 +168,7 @@ def read_latest_data(
     if not output_folder.exists():
         output_folder.mkdir()
 
+    print(f"Writing primap2 file to {output_folder / (output_filename + ".csv")}")
     pm2.pm2io.write_interchange_format(
         output_folder / (output_filename + ".csv"),
         data_if,
@@ -178,8 +176,32 @@ def read_latest_data(
 
     compression = dict(zlib=True, complevel=9)
     encoding = {var: compression for var in data_pm2.data_vars}
+    print(f"Writing netcdf file to {output_folder / (output_filename + ".nc")}")
     data_pm2.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
 
     # next steps
     # convert to IPCC2006_PRIMAP categories
     # save final version
+
+
+def read_latest_data(
+    downloaded_data_path: pathlib.Path = downloaded_data_path,
+    save_path: pathlib.Path = extracted_data_path,
+) -> None:
+    """
+    Read and save the latest data
+
+    Converts downloaded data into interchange format and primap2 native format
+    and saves the files in the extracted_data directory.
+
+    """
+    domains = get_all_domains(downloaded_data_path)
+
+    domains_and_releases_to_read = []
+    for domain in domains:
+        domain_path = downloaded_data_path / domain
+        domains_and_releases_to_read.append((domain, get_latest_release(domain_path)))
+
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read, save_path=save_path
+    )

+ 41 - 0
tests/integration/test_read_data_set.py

@@ -0,0 +1,41 @@
+import os
+import subprocess
+from pathlib import Path
+
+
+def test_read_data_set(tmp_path):
+    script_path = Path(__file__).parent.parent.parent / "scripts" / "read_data_set.py"
+
+    # Build the command
+    command = [
+        "poetry",
+        "run",
+        "python3",
+        str(script_path),
+        "--save_path",
+        str(tmp_path),
+        "--run_id",
+        "2024",
+    ]
+
+    # Run the command
+    result = subprocess.run(command, capture_output=True, text=True, check=False)  # noqa: S603
+
+    # Check the result
+    assert result.returncode == 0, f"Script failed: {result.stderr}"
+
+    release_folder = os.listdir(tmp_path)
+
+    # there should be one directory created
+    assert len(release_folder) == 1
+    # and it starts with "v" (the date changes with each release)
+    assert release_folder[0].startswith("v")
+
+    output_files = os.listdir(tmp_path / release_folder[0])
+    # in the folder there should be three files
+    assert len(output_files) == 3
+
+    # a .yaml, .csv, and .nc file
+    required_extensions = {"nc", "csv", "yaml"}
+    file_extensions = {file.split(".")[-1] for file in output_files}
+    assert required_extensions == file_extensions

+ 28 - 0
tests/unit/test_read_data.py

@@ -0,0 +1,28 @@
+# from src.faostat_data_primap.helper.definitions import domains_and_releases_to_read
+import os
+
+from src.faostat_data_primap.read import read_data
+
+
+def test_read_data_one_domain(tmp_path):
+    domains_and_releases_to_read = (("farm_gate_agriculture_energy", "2024-11-14"),)
+
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read, save_path=tmp_path
+    )
+
+    release_folder = os.listdir(tmp_path)
+
+    # there should be one directory created
+    assert len(release_folder) == 1
+    # and it starts with "v" (the date changes with each release)
+    assert release_folder[0].startswith("v")
+
+    output_files = os.listdir(tmp_path / release_folder[0])
+    # in the folder there should be three files
+    assert len(output_files) == 3
+
+    # a .yaml, .csv, and .nc file
+    required_extensions = {"nc", "csv", "yaml"}
+    file_extensions = {file.split(".")[-1] for file in output_files}
+    assert required_extensions == file_extensions