Browse Source

Merge pull request #5 from primap-community/doit

Switch to doit
crdanielbusch 2 months ago
parent
commit
239167ba6a

+ 1 - 0
.gitattributes

@@ -3,3 +3,4 @@
 * annex.largefiles=((mimeencoding=binary)and(largerthan=0))
 *.csv annex.largefiles=anything
 *.zip annex.largefiles=anything
+*.pdf annex.largefiles=anything

+ 0 - 10
Makefile

@@ -77,13 +77,3 @@ virtual-environment:  ## update virtual environment, create a new one if it does
 	poetry config virtualenvs.in-project true
 	poetry install --all-extras
 	poetry run pre-commit install
-
-.PHONY: download_all_domains-environment
-download_all_domains:
-	# downloads and stages (datalad save) all available data
-	datalad run poetry run python3 scripts/download_all_domains.py
-
-.PHONY: read_latest_data
-download_all_domains:
-	# reads and stages (datalad save) the latest data for each domain
-	datalad run poetry run python3 scripts/read_all_domains.py

+ 58 - 0
dodo.py

@@ -0,0 +1,58 @@
+"""
+Define tasks to download and read the FAO data set.
+"""
+import datalad.api
+
+
+def task_download():
+    """
+    Download latest data
+    """
+
+    def datalad_run_download():
+        datalad.api.run(
+            cmd="python3 scripts/download_all_domains.py",
+            outputs="downloaded_data",
+        )
+
+    return {"actions": [datalad_run_download]}
+
+
+def task_read():
+    """
+    read data set
+    """
+
+    def read_dataset(save_path, run_id):
+        print(f"Reading dataset for {save_path=} and {run_id=}")
+        cmd = (
+            f"python3 scripts/read_data_set.py "
+            f"--save_path {save_path} --run_id {run_id}"
+        )
+
+        datalad.api.run(
+            cmd=cmd,
+            message="Read data set",
+            outputs=f"{save_path}",
+        )
+
+    return {
+        "actions": [read_dataset],
+        "params": [
+            {
+                "name": "save_path",
+                "short": "s",
+                "long": "save_path",
+                "default": "extracted_data",
+                "help": "Path to save the data.",
+            },
+            {
+                "name": "run_id",
+                "long": "run_id",
+                "short": "r",
+                "default": "2024",
+                "help": "Run identifier.",
+            },
+        ],
+        "verbosity": 2,
+    }

+ 1 - 1
downloaded_data/farm_gate_agriculture_energy/2024-11-14/GN_2023Oct_Final.pdf

@@ -1 +1 @@
-/Users/danielbusch/Documents/FAOSTAT_data_primap/downloaded_data/farm_gate_agriculture_energy/2023-12-13/GN_2023Oct_Final.pdf
+../../../.git/annex/objects/JK/1V/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf/MD5E-s289407--8bb0d371bcfb2958de3106da9663edab.pdf

+ 1 - 1
downloaded_data/pre_post_agricultural_production/2024-11-14/README_Methodological_Note.pdf

@@ -1 +1 @@
-/Users/danielbusch/Documents/FAOSTAT_data_primap/downloaded_data/pre_post_agricultural_production/2023-11-09/README_Methodological_Note.pdf
+../../../.git/annex/objects/28/j9/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf/MD5E-s950199--b540bc153f62f6e3c17d79fdde2802f6.pdf

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/5f/mq/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv/MD5E-s45951559--903471c4bf5a6616e7a144e21b8e4954.csv
+../../.git/annex/objects/fQ/4m/MD5E-s4236841--55a238326c11e125114380c4502d7cf4.csv/MD5E-s4236841--55a238326c11e125114380c4502d7cf4.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.nc

@@ -1 +1 @@
-../../.git/annex/objects/7x/xk/MD5E-s21114760--2857537fa84990f457054a91ab674169.nc/MD5E-s21114760--2857537fa84990f457054a91ab674169.nc
+../../.git/annex/objects/FX/kw/MD5E-s2607829--fa792e6284b6dc663c97a0c587210e07.nc/MD5E-s2607829--fa792e6284b6dc663c97a0c587210e07.nc

+ 2 - 2
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.yaml

@@ -13,10 +13,10 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - category (FAOSTAT)
   - source
-  - scenario (FAO)
+  - category (FAOSTAT)
   - area (ISO3)
+  - scenario (FAO)
   - entity
   - unit
 data_file: FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

+ 497 - 1
poetry.lock

@@ -29,6 +29,21 @@ files = [
     {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
 ]
 
+[[package]]
+name = "annexremote"
+version = "1.6.6"
+description = "git annex special remotes made easy"
+optional = false
+python-versions = "*"
+files = [
+    {file = "annexremote-1.6.6-py3-none-any.whl", hash = "sha256:dee4efa33c3bd9514928af5c57c82599ca9dc0a5535121ee234ed1833a98f93e"},
+    {file = "annexremote-1.6.6.tar.gz", hash = "sha256:5f78d0753c0763d95fc4c52050bd6212bb32457d32f6575dc66a83178e0283a7"},
+]
+
+[package.extras]
+doc = ["sphinx"]
+tests = ["coverage", "pytest"]
+
 [[package]]
 name = "appnope"
 version = "0.1.4"
@@ -91,6 +106,21 @@ files = [
 [package.extras]
 dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
 
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+description = "Backport of CPython tarfile module"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
+    {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
+]
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"]
+
 [[package]]
 name = "beautifulsoup4"
 version = "4.12.3"
@@ -112,6 +142,44 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]
 
+[[package]]
+name = "boto3"
+version = "1.35.72"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "boto3-1.35.72-py3-none-any.whl", hash = "sha256:410bb4ec676c57ee9c3c7824b7b1a3721584f18f8ee8ccc8e8ecdf285136b77f"},
+    {file = "boto3-1.35.72.tar.gz", hash = "sha256:f9fc94413a959c388b1654c6687a5193293f3c69f8d0af3b86fd48b4096a23f3"},
+]
+
+[package.dependencies]
+botocore = ">=1.35.72,<1.36.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.10.0,<0.11.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.35.72"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "botocore-1.35.72-py3-none-any.whl", hash = "sha256:7412877c3f766a1bfd09236e225ce1f0dc2c35e47949ae423e56e2093c8fa23a"},
+    {file = "botocore-1.35.72.tar.gz", hash = "sha256:6b5fac38ef7cfdbc7781a751e0f78833ccb9149ba815bc238b1dbb75c90fbae5"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.22.0)"]
+
 [[package]]
 name = "certifi"
 version = "2024.8.30"
@@ -213,6 +281,17 @@ files = [
     {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
 ]
 
+[[package]]
+name = "chardet"
+version = "5.2.0"
+description = "Universal encoding detector for Python 3"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"},
+    {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.0"
@@ -341,6 +420,17 @@ files = [
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
+[[package]]
+name = "cloudpickle"
+version = "3.1.0"
+description = "Pickler class to extend the standard pickle.Pickler functionality"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"},
+    {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"},
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -519,6 +609,57 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1
 [package.extras]
 toml = ["tomli"]
 
+[[package]]
+name = "cryptography"
+version = "44.0.0"
+description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+optional = false
+python-versions = "!=3.9.0,!=3.9.1,>=3.7"
+files = [
+    {file = "cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"},
+    {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"},
+    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"},
+    {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"},
+    {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"},
+    {file = "cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd"},
+    {file = "cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"},
+    {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"},
+    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"},
+    {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"},
+    {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"},
+    {file = "cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa"},
+    {file = "cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c"},
+    {file = "cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02"},
+]
+
+[package.dependencies]
+cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
+
+[package.extras]
+docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"]
+docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"]
+nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"]
+pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
+sdist = ["build (>=1.0.0)"]
+ssh = ["bcrypt (>=3.1.5)"]
+test = ["certifi (>=2024)", "cryptography-vectors (==44.0.0)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
+test-randomorder = ["pytest-randomly"]
+
 [[package]]
 name = "cycler"
 version = "0.12.1"
@@ -534,6 +675,51 @@ files = [
 docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
 
+[[package]]
+name = "datalad"
+version = "1.1.4"
+description = "data distribution geared toward scientific datasets"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "datalad-1.1.4-py3-none-any.whl", hash = "sha256:07a95efe456c82e7a8ca8e15a85bac3e588bc189abd4ddbace00f47d259ddaa9"},
+    {file = "datalad-1.1.4.tar.gz", hash = "sha256:db9286a4baf74d53d23c3bfb5928452197d57f36abed8ba0af334a3dd038166b"},
+]
+
+[package.dependencies]
+annexremote = "*"
+boto3 = "*"
+chardet = ">=3.0.4"
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+distro = "*"
+fasteners = ">=0.14"
+humanize = "*"
+iso8601 = "*"
+keyring = ">=20.0,<23.9.0 || >23.9.0"
+"keyrings.alt" = "*"
+looseversion = "*"
+msgpack = "*"
+packaging = "*"
+patool = ">=1.7"
+platformdirs = "*"
+python-gitlab = "*"
+requests = ">=1.2"
+tqdm = ">=4.32.0"
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+core = ["annexremote", "chardet (>=3.0.4)", "colorama", "distro", "fasteners (>=0.14)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "looseversion", "packaging", "patool (>=1.7)", "platformdirs", "tqdm (>=4.32.0)", "typing-extensions (>=4.0.0)"]
+devel = ["BeautifulSoup4", "annexremote", "argcomplete (>=1.12.3)", "asv", "boto3", "chardet (>=3.0.4)", "colorama", "coverage (!=7.6.5)", "distro", "duecredit", "fasteners (>=0.14)", "gprof2dot", "httpretty (>=0.9.4)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "looseversion", "msgpack", "mypy", "packaging", "patool (>=1.7)", "platformdirs", "psutil", "pypandoc", "pyperclip", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "pytest-xdist", "python-dateutil", "python-gitlab", "requests (>=1.2)", "requests-ftp", "scriv", "sphinx (>=4.3.0)", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=0.5.1)", "tqdm (>=4.32.0)", "types-python-dateutil", "types-requests", "typing-extensions (>=4.0.0)", "vcrpy"]
+devel-docs = ["pypandoc", "sphinx (>=4.3.0)", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=0.5.1)"]
+devel-utils = ["asv", "coverage (!=7.6.5)", "gprof2dot", "psutil", "pytest-xdist", "scriv"]
+downloaders = ["boto3", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "msgpack", "requests (>=1.2)"]
+downloaders-extra = ["requests-ftp"]
+duecredit = ["duecredit"]
+full = ["BeautifulSoup4", "annexremote", "argcomplete (>=1.12.3)", "boto3", "chardet (>=3.0.4)", "colorama", "distro", "duecredit", "fasteners (>=0.14)", "httpretty (>=0.9.4)", "humanize", "importlib-metadata (>=3.6)", "iso8601", "keyring (>=20.0,!=23.9.0)", "keyrings.alt", "looseversion", "msgpack", "mypy", "packaging", "patool (>=1.7)", "platformdirs", "pyperclip", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "python-dateutil", "python-gitlab", "requests (>=1.2)", "requests-ftp", "tqdm (>=4.32.0)", "types-python-dateutil", "types-requests", "typing-extensions (>=4.0.0)", "vcrpy"]
+misc = ["argcomplete (>=1.12.3)", "pyperclip", "python-dateutil"]
+publish = ["python-gitlab"]
+tests = ["BeautifulSoup4", "httpretty (>=0.9.4)", "mypy", "pytest (>=7.0)", "pytest-cov", "pytest-fail-slow (>=0.2,<1.0)", "types-python-dateutil", "types-requests", "vcrpy"]
+
 [[package]]
 name = "debugpy"
 version = "1.8.8"
@@ -591,6 +777,17 @@ files = [
     {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+description = "Distro - an OS platform information API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
+    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
+]
+
 [[package]]
 name = "docutils"
 version = "0.19"
@@ -602,6 +799,24 @@ files = [
     {file = "docutils-0.19.tar.gz", hash = "sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6"},
 ]
 
+[[package]]
+name = "doit"
+version = "0.36.0"
+description = "doit - Automation Tool"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "doit-0.36.0-py3-none-any.whl", hash = "sha256:ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a"},
+    {file = "doit-0.36.0.tar.gz", hash = "sha256:71d07ccc9514cb22fe59d98999577665eaab57e16f644d04336ae0b4bae234bc"},
+]
+
+[package.dependencies]
+cloudpickle = "*"
+importlib-metadata = ">=4.4"
+
+[package.extras]
+toml = ["tomli"]
+
 [[package]]
 name = "et-xmlfile"
 version = "2.0.0"
@@ -641,6 +856,17 @@ files = [
 [package.extras]
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
 
+[[package]]
+name = "fasteners"
+version = "0.19"
+description = "A python package that provides useful locks"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"},
+    {file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"},
+]
+
 [[package]]
 name = "fastjsonschema"
 version = "2.20.0"
@@ -942,6 +1168,20 @@ files = [
 [package.dependencies]
 numpy = ">=1.19.3"
 
+[[package]]
+name = "humanize"
+version = "4.11.0"
+description = "Python humanize utilities"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "humanize-4.11.0-py3-none-any.whl", hash = "sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0"},
+    {file = "humanize-4.11.0.tar.gz", hash = "sha256:e66f36020a2d5a974c504bd2555cf770621dbdbb6d82f94a6857c0b1ea2608be"},
+]
+
+[package.extras]
+tests = ["freezegun", "pytest", "pytest-cov"]
+
 [[package]]
 name = "identify"
 version = "2.6.2"
@@ -1104,6 +1344,75 @@ qtconsole = ["qtconsole"]
 test = ["packaging", "pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"]
 test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
 
+[[package]]
+name = "iso8601"
+version = "2.1.0"
+description = "Simple module to parse ISO 8601 dates"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+    {file = "iso8601-2.1.0-py3-none-any.whl", hash = "sha256:aac4145c4dcb66ad8b648a02830f5e2ff6c24af20f4f482689be402db2429242"},
+    {file = "iso8601-2.1.0.tar.gz", hash = "sha256:6b1d3829ee8921c4301998c909f7829fa9ed3cbdac0d3b16af2d743aed1ba8df"},
+]
+
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+description = "Utility functions for Python class constructs"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790"},
+    {file = "jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd"},
+]
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-context"
+version = "6.0.1"
+description = "Useful decorators and context managers"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4"},
+    {file = "jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3"},
+]
+
+[package.dependencies]
+"backports.tarfile" = {version = "*", markers = "python_version < \"3.12\""}
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["portend", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.1.0"
+description = "Functools like those found in stdlib"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jaraco.functools-4.1.0-py3-none-any.whl", hash = "sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649"},
+    {file = "jaraco_functools-4.1.0.tar.gz", hash = "sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d"},
+]
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["jaraco.classes", "pytest (>=6,!=8.1.*)"]
+type = ["pytest-mypy"]
+
 [[package]]
 name = "jedi"
 version = "0.19.2"
@@ -1123,6 +1432,21 @@ docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alab
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
 
+[[package]]
+name = "jeepney"
+version = "0.8.0"
+description = "Low-level, pure Python DBus protocol wrapper."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"},
+    {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"},
+]
+
+[package.extras]
+test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"]
+trio = ["async_generator", "trio"]
+
 [[package]]
 name = "jinja2"
 version = "3.1.4"
@@ -1140,6 +1464,17 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.23.0"
@@ -1273,6 +1608,54 @@ test-functional = ["pytest", "pytest-randomly", "pytest-xdist"]
 test-integration = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-randomly", "pytest-xdist"]
 test-ui = ["calysto-bash"]
 
+[[package]]
+name = "keyring"
+version = "25.5.0"
+description = "Store and access your passwords safely."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "keyring-25.5.0-py3-none-any.whl", hash = "sha256:e67f8ac32b04be4714b42fe84ce7dad9c40985b9ca827c592cc303e7c26d9741"},
+    {file = "keyring-25.5.0.tar.gz", hash = "sha256:4c753b3ec91717fe713c4edd522d625889d8973a349b0e582622f49766de58e6"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""}
+"jaraco.classes" = "*"
+"jaraco.context" = "*"
+"jaraco.functools" = "*"
+jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
+pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""}
+SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+completion = ["shtab (>=1.1.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["pyfakefs", "pytest (>=6,!=8.1.*)"]
+type = ["pygobject-stubs", "pytest-mypy", "shtab", "types-pywin32"]
+
+[[package]]
+name = "keyrings-alt"
+version = "5.0.2"
+description = "Alternate keyring implementations"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "keyrings.alt-5.0.2-py3-none-any.whl", hash = "sha256:6be74693192f3f37bbb752bfac9b86e6177076b17d2ac12a390f1d6abff8ac7c"},
+    {file = "keyrings_alt-5.0.2.tar.gz", hash = "sha256:8f097ebe9dc8b185106502b8cdb066c926d2180e13b4689fd4771a3eab7d69fb"},
+]
+
+[package.dependencies]
+"jaraco.classes" = "*"
+"jaraco.context" = "*"
+
+[package.extras]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["gdata", "keyring (>=20)", "pycryptodome", "pycryptodomex", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "python-keyczar"]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -1459,6 +1842,17 @@ win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
 [package.extras]
 dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
 
+[[package]]
+name = "looseversion"
+version = "1.3.0"
+description = "Version numbering for anarchists and software realists"
+optional = false
+python-versions = "*"
+files = [
+    {file = "looseversion-1.3.0-py2.py3-none-any.whl", hash = "sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0"},
+    {file = "looseversion-1.3.0.tar.gz", hash = "sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e"},
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "2.2.0"
@@ -1660,6 +2054,17 @@ files = [
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
 ]
 
+[[package]]
+name = "more-itertools"
+version = "10.5.0"
+description = "More routines for operating on iterables, beyond itertools"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "more-itertools-10.5.0.tar.gz", hash = "sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6"},
+    {file = "more_itertools-10.5.0-py3-none-any.whl", hash = "sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef"},
+]
+
 [[package]]
 name = "msgpack"
 version = "1.1.0"
@@ -2188,6 +2593,20 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "patool"
+version = "3.1.0"
+description = "portable archive file manager"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "patool-3.1.0-py2.py3-none-any.whl", hash = "sha256:401a918bdbf65434fd59c038bdb2c15ff7185675aedddb4494330c3e8e4fe80d"},
+    {file = "patool-3.1.0.tar.gz", hash = "sha256:417ed1ff7b9c979ce8a10114ed9bc280b08e3af3df3072e761303a3e00aaba04"},
+]
+
+[package.extras]
+argcompletion = ["argcomplete (==3.5.1)"]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -2651,6 +3070,26 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]
 
+[[package]]
+name = "python-gitlab"
+version = "5.1.0"
+description = "A python wrapper for the GitLab API"
+optional = false
+python-versions = ">=3.9.0"
+files = [
+    {file = "python_gitlab-5.1.0-py3-none-any.whl", hash = "sha256:c30cf547392ce66daaaf020839cfb6c15a91b26e2e7054d1b3f1b92e8dd65e7d"},
+    {file = "python_gitlab-5.1.0.tar.gz", hash = "sha256:d5a10dae8328f32fb9214bd3f9dc199b4930cd496f81c9be42a0f8ff338aeb35"},
+]
+
+[package.dependencies]
+requests = ">=2.32.0"
+requests-toolbelt = ">=1.0.0"
+
+[package.extras]
+autocompletion = ["argcomplete (>=1.10.0,<3)"]
+graphql = ["gql[httpx] (>=3.5.0,<4)"]
+yaml = ["PyYaml (>=6.0.1)"]
+
 [[package]]
 name = "pytz"
 version = "2024.2"
@@ -2689,6 +3128,17 @@ files = [
     {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
 ]
 
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+description = "A (partial) reimplementation of pywin32 using ctypes/cffi"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755"},
+    {file = "pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8"},
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.2"
@@ -2908,6 +3358,20 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requests-toolbelt"
+version = "1.0.0"
+description = "A utility belt for advanced users of python-requests"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
+    {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
+]
+
+[package.dependencies]
+requests = ">=2.0.1,<3.0.0"
+
 [[package]]
 name = "rpds-py"
 version = "0.21.0"
@@ -3101,6 +3565,23 @@ files = [
     {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"},
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.10.4"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"},
+    {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"},
+]
+
+[package.dependencies]
+botocore = ">=1.33.2,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+
 [[package]]
 name = "scipy"
 version = "1.14.1"
@@ -3151,6 +3632,21 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest
 doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"]
 test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
+[[package]]
+name = "secretstorage"
+version = "3.3.3"
+description = "Python bindings to FreeDesktop.org Secret Service API"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"},
+    {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"},
+]
+
+[package.dependencies]
+cryptography = ">=2.0"
+jeepney = ">=0.6"
+
 [[package]]
 name = "selenium"
 version = "4.26.1"
@@ -3959,4 +4455,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "ba984aa6c4d56e62124fc30db68f1f9f7140a6fe95b7f50d471b30bbb9009f22"
+content-hash = "75b95a9cbc4a5d95ef30465ed470a2cf944ae0c7fcd9a32884c72544318ad507"

+ 5 - 0
pyproject.toml

@@ -19,6 +19,8 @@ pandas = "^2.2.3"
 pycountry = "^24.6.1"
 primap2 = "^0.11.2"
 pandas-stubs = "^2.2.3.241009"
+datalad = "^1.1.4"
+doit = "^0.36.0"
 
 
 [tool.poetry.group.tests.dependencies]
@@ -204,6 +206,9 @@ authorized_licenses = [
     "python software foundation license",
     "zpl 2.1",
     'CMU License (MIT-CMU)',
+    'GNU General Public License v3 (GPLv3)',
+    'GNU Lesser General Public License v3 (LGPLv3)',
+
 ]
 # This starting list is relatively conservative. Depending on the project, it
 # may make sense to move some of these into the authorized list

+ 120 - 3
requirements.txt

@@ -1,13 +1,25 @@
+annexremote==1.6.6 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:5f78d0753c0763d95fc4c52050bd6212bb32457d32f6575dc66a83178e0283a7 \
+    --hash=sha256:dee4efa33c3bd9514928af5c57c82599ca9dc0a5535121ee234ed1833a98f93e
 attrs==24.2.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \
     --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2
+backports-tarfile==1.2.0 ; python_version >= "3.10" and python_version < "3.12" \
+    --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \
+    --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991
 beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051 \
     --hash=sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed
+boto3==1.35.72 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:410bb4ec676c57ee9c3c7824b7b1a3721584f18f8ee8ccc8e8ecdf285136b77f \
+    --hash=sha256:f9fc94413a959c388b1654c6687a5193293f3c69f8d0af3b86fd48b4096a23f3
+botocore==1.35.72 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6b5fac38ef7cfdbc7781a751e0f78833ccb9149ba815bc238b1dbb75c90fbae5 \
+    --hash=sha256:7412877c3f766a1bfd09236e225ce1f0dc2c35e47949ae423e56e2093c8fa23a
 certifi==2024.8.30 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \
     --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9
-cffi==1.17.1 ; os_name == "nt" and implementation_name != "pypy" and python_version >= "3.10" and python_version < "3.13" \
+cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.13" and os_name == "nt" and implementation_name != "pypy" or python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" and platform_python_implementation != "PyPy" \
     --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \
     --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \
     --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \
@@ -75,6 +87,9 @@ cffi==1.17.1 ; os_name == "nt" and implementation_name != "pypy" and python_vers
     --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \
     --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \
     --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b
+chardet==5.2.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
+    --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
 charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621 \
     --hash=sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6 \
@@ -181,7 +196,10 @@ charset-normalizer==3.4.0 ; python_version >= "3.10" and python_version < "3.13"
     --hash=sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b \
     --hash=sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079 \
     --hash=sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482
-colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows") \
+cloudpickle==3.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b \
+    --hash=sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (platform_system == "Windows" or sys_platform == "win32") \
     --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
     --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
 contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.13" \
@@ -239,15 +257,57 @@ contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81 \
     --hash=sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9 \
     --hash=sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375
+cryptography==44.0.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7 \
+    --hash=sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731 \
+    --hash=sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b \
+    --hash=sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc \
+    --hash=sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543 \
+    --hash=sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385 \
+    --hash=sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c \
+    --hash=sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591 \
+    --hash=sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede \
+    --hash=sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb \
+    --hash=sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f \
+    --hash=sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123 \
+    --hash=sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c \
+    --hash=sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba \
+    --hash=sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c \
+    --hash=sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285 \
+    --hash=sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd \
+    --hash=sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092 \
+    --hash=sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa \
+    --hash=sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289 \
+    --hash=sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02 \
+    --hash=sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64 \
+    --hash=sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053 \
+    --hash=sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417 \
+    --hash=sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e \
+    --hash=sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e \
+    --hash=sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7 \
+    --hash=sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756 \
+    --hash=sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4
 cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 \
     --hash=sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c
+datalad==1.1.4 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:07a95efe456c82e7a8ca8e15a85bac3e588bc189abd4ddbace00f47d259ddaa9 \
+    --hash=sha256:db9286a4baf74d53d23c3bfb5928452197d57f36abed8ba0af334a3dd038166b
+distro==1.9.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed \
+    --hash=sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2
+doit==0.36.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:71d07ccc9514cb22fe59d98999577665eaab57e16f644d04336ae0b4bae234bc \
+    --hash=sha256:ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a
 et-xmlfile==2.0.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa \
     --hash=sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54
 exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \
     --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \
     --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc
+fasteners==0.19 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237 \
+    --hash=sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c
 flexcache==0.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656 \
     --hash=sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32
@@ -341,9 +401,39 @@ h5py==3.12.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:d2b8dd64f127d8b324f5d2cd1c0fd6f68af69084e9e47d27efeb9e28e685af3e \
     --hash=sha256:d3e465aee0ec353949f0f46bf6c6f9790a2006af896cee7c178a8c3e5090aa32 \
     --hash=sha256:e4d51919110a030913201422fb07987db4338eba5ec8c5a15d6fab8e03d443fc
+humanize==4.11.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:b53caaec8532bcb2fff70c8826f904c35943f8cecaca29d272d9df38092736c0 \
+    --hash=sha256:e66f36020a2d5a974c504bd2555cf770621dbdbb6d82f94a6857c0b1ea2608be
 idna==3.10 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
     --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
+importlib-metadata==8.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \
+    --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7
+iso8601==2.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6b1d3829ee8921c4301998c909f7829fa9ed3cbdac0d3b16af2d743aed1ba8df \
+    --hash=sha256:aac4145c4dcb66ad8b648a02830f5e2ff6c24af20f4f482689be402db2429242
+jaraco-classes==3.4.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \
+    --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790
+jaraco-context==6.0.1 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \
+    --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4
+jaraco-functools==4.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \
+    --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649
+jeepney==0.8.0 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \
+    --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755
+jmespath==1.0.1 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \
+    --hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe
+keyring==25.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:4c753b3ec91717fe713c4edd522d625889d8973a349b0e582622f49766de58e6 \
+    --hash=sha256:e67f8ac32b04be4714b42fe84ce7dad9c40985b9ca827c592cc303e7c26d9741
+keyrings-alt==5.0.2 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:6be74693192f3f37bbb752bfac9b86e6177076b17d2ac12a390f1d6abff8ac7c \
+    --hash=sha256:8f097ebe9dc8b185106502b8cdb066c926d2180e13b4689fd4771a3eab7d69fb
 kiwisolver==1.4.7 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a \
     --hash=sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95 \
@@ -484,6 +574,9 @@ llvmlite==0.43.0 ; python_version >= "3.10" and python_version < "3.13" \
 loguru==0.7.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb \
     --hash=sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac
+looseversion==1.3.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0 \
+    --hash=sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e
 matplotlib==3.9.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21 \
     --hash=sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5 \
@@ -525,6 +618,9 @@ matplotlib==3.9.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49 \
     --hash=sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c \
     --hash=sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413
+more-itertools==10.5.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \
+    --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6
 msgpack==1.1.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b \
     --hash=sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf \
@@ -710,6 +806,9 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \
     --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \
     --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319
+patool==3.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:401a918bdbf65434fd59c038bdb2c15ff7185675aedddb4494330c3e8e4fe80d \
+    --hash=sha256:417ed1ff7b9c979ce8a10114ed9bc280b08e3af3df3072e761303a3e00aaba04
 pillow==11.0.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7 \
     --hash=sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5 \
@@ -801,7 +900,7 @@ primap2==0.11.2 ; python_version >= "3.10" and python_version < "3.13" \
 pycountry==24.6.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:b61b3faccea67f87d10c1f2b0fc0be714409e8fcdcc1315613174f6466c10221 \
     --hash=sha256:f1a4fb391cd7214f8eefd39556d740adcc233c778a27f8942c8dca351d6ce06f
-pycparser==2.22 ; os_name == "nt" and implementation_name != "pypy" and python_version >= "3.10" and python_version < "3.13" \
+pycparser==2.22 ; python_version >= "3.10" and python_version < "3.13" and os_name == "nt" and implementation_name != "pypy" or python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" and platform_python_implementation != "PyPy" \
     --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
     --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
 pyparsing==3.2.0 ; python_version >= "3.10" and python_version < "3.13" \
@@ -817,9 +916,18 @@ python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.
 python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca \
     --hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
+python-gitlab==5.1.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:c30cf547392ce66daaaf020839cfb6c15a91b26e2e7054d1b3f1b92e8dd65e7d \
+    --hash=sha256:d5a10dae8328f32fb9214bd3f9dc199b4930cd496f81c9be42a0f8ff338aeb35
 pytz==2024.2 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a \
     --hash=sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725
+pywin32-ctypes==0.2.3 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "win32" \
+    --hash=sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8 \
+    --hash=sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755
+requests-toolbelt==1.0.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \
+    --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06
 requests==2.32.3 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \
     --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
@@ -868,6 +976,9 @@ ruamel-yaml-clib==0.2.12 ; platform_python_implementation == "CPython" and pytho
 ruamel-yaml==0.18.6 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:57b53ba33def16c4f3d807c0ccbc00f8a6081827e81ba2491691b76882d0c636 \
     --hash=sha256:8b27e6a217e786c6fbe5634d8f3f11bc63e0f80f6a5890f28863d9c45aac311b
+s3transfer==0.10.4 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e \
+    --hash=sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7
 scipy==1.14.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e \
     --hash=sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79 \
@@ -902,6 +1013,9 @@ scipy==1.14.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc \
     --hash=sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1 \
     --hash=sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2
+secretstorage==3.3.3 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "linux" \
+    --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \
+    --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99
 selenium==4.26.1 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:1db3f3a0cd5bb07624fa8a3905a6fdde1595a42185a0617077c361dc53d104fb \
     --hash=sha256:7640f3f08ae7f4e450f895678e8a10a55eb4e4ca18311ed675ecc4684b96b683
@@ -968,3 +1082,6 @@ wsproto==1.2.0 ; python_version >= "3.10" and python_version < "3.13" \
 xarray==2024.10.0 ; python_version >= "3.10" and python_version < "3.13" \
     --hash=sha256:ae1d38cb44a0324dfb61e492394158ae22389bf7de9f3c174309c17376df63a0 \
     --hash=sha256:e369e2bac430e418c2448e5b96f07da4635f98c1319aa23cfeb3fbcb9a01d2e0
+zipp==3.21.0 ; python_version >= "3.10" and python_version < "3.13" \
+    --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
+    --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931

+ 31 - 0
scripts/read_data_set.py

@@ -0,0 +1,31 @@
+"""Read selected domains and versions."""
+from pathlib import Path
+
+import click
+
+from faostat_data_primap.helper.definitions import domains_and_releases_to_read
+from faostat_data_primap.helper.paths import (
+    extracted_data_path,
+)
+from faostat_data_primap.read import (
+    read_data,
+)
+
+
+@click.command()
+@click.option("--run_id", default="2024", help="Configuration to run")
+@click.option("--save_path", default=None, help="Where to save data in root directory.")
+def run(run_id, save_path):
+    """Prepare and run read data function"""
+    if not save_path:
+        save_path = extracted_data_path
+    else:
+        save_path = Path(save_path)
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read[run_id],
+        save_path=save_path,
+    )
+
+
+if __name__ == "__main__":
+    run()

+ 33 - 128
src/faostat_data_primap/download.py

@@ -1,6 +1,5 @@
 """Downloads data from FAOSTAT website."""
 
-import hashlib
 import os
 import pathlib
 import time
@@ -18,78 +17,6 @@ from faostat_data_primap.helper.definitions import domains
 from faostat_data_primap.helper.paths import downloaded_data_path
 
 
-def find_previous_release_path(
-    current_release_path: pathlib.Path,
-) -> pathlib.Path | None:
-    """
-    Find the most recent previous release directory within same domain
-
-    Release directories are assumed to be subdirectories within the same parent
-    directory as `current_release_path`. The Sorting is done alphabetically,
-    so directory names should follow the naming convention YYYY-MM-DD
-
-    Parameters
-    ----------
-    current_release_path : pathlib.Path
-        The path of the current release directory.
-
-    Returns
-    -------
-    pathlib.Path or None
-        Returns the path of the most recent previous release directory if one exists,
-        otherwise returns None.
-    """
-    domain_path = current_release_path.parent
-    all_releases = [
-        release_name
-        for release_name in os.listdir(current_release_path.parent)
-        if (domain_path / release_name).is_dir()
-    ]
-
-    # make sure all directories follow the naming convention
-    try:
-        all_releases_datetime = [
-            datetime.strptime(release, "%Y-%m-%d") for release in all_releases
-        ]
-    except ValueError as e:
-        msg = (
-            "All release folders must be in YYYY-MM-DD format, "
-            f"got {sorted(all_releases)}"
-        )
-        raise ValueError(msg) from e
-
-    all_releases_datetime = sorted(all_releases_datetime)
-    current_release_datetime = datetime.strptime(current_release_path.name, "%Y-%m-%d")
-    index = all_releases_datetime.index(current_release_datetime)
-
-    # if the current release is the latest or the only one
-    if index == 0:
-        return None
-
-    return domain_path / all_releases_datetime[index - 1].strftime("%Y-%m-%d")
-
-
-def calculate_checksum(file_path: pathlib.Path) -> str:
-    """
-    Calculate the SHA-256 checksum of a file.
-
-    Parameters
-    ----------
-    file_path : pathlib.Path
-        The path to the file for which the checksum is calculated.
-
-    Returns
-    -------
-    str
-        The SHA-256 checksum of the file as a hexadecimal string.
-    """
-    sha256 = hashlib.sha256()
-    with open(file_path, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            sha256.update(chunk)
-    return sha256.hexdigest()
-
-
 def download_methodology(url_download: str, save_path: pathlib.Path) -> None:
     """
     Download methodology file.
@@ -115,46 +42,16 @@ def download_methodology(url_download: str, save_path: pathlib.Path) -> None:
     download_path = save_path / filename
 
     if download_path.exists():
-        print(f"Skipping download of {download_path} because it already exists.")
-        return
-
-    previous_release = find_previous_release_path(save_path)
-    # Attempt to find a file to compare in the previous release
-    if previous_release:
-        file_to_compare = previous_release / filename
-        if file_to_compare.exists():
-            response = requests.get(url_download, stream=True, timeout=30)
-            response.raise_for_status()
-            file_to_download_checksum = hashlib.sha256(response.content).hexdigest()
-            file_to_compare_checksum = calculate_checksum(file_to_compare)
-
-            if file_to_download_checksum == file_to_compare_checksum:
-                print(
-                    f"File '{filename}' is identical in the previous release. "
-                    f"Creating symlink."
-                )
-                os.symlink(file_to_compare, download_path)
-                return
-            else:
-                print(
-                    f"File '{filename}' differs from previous release. "
-                    f"Downloading file."
-                )
+        if download_path.is_symlink():
+            os.remove(download_path)
         else:
-            print(f"File '{filename}' not found in previous release. Downloading file.")
-            response = requests.get(url_download, stream=True, timeout=30)
-            response.raise_for_status()
-
-        # Save downloaded file to current release
-        with open(download_path, "wb") as f:
-            f.write(response.content)
+            print(f"Skipping download of {download_path} because it already exists.")
+            return
 
-    else:
-        print(f"No previous release found. Downloading file '{filename}'.")
-        response = requests.get(url_download, stream=True, timeout=30)
-        response.raise_for_status()
-        with open(download_path, "wb") as f:
-            f.write(response.content)
+    response = requests.get(url_download, stream=True, timeout=30)
+    response.raise_for_status()
+    with open(download_path, "wb") as f:
+        f.write(response.content)
 
 
 def get_html_content(url: str) -> BeautifulSoup:
@@ -241,17 +138,18 @@ def download_file(url_download: str, save_path: pathlib.Path) -> bool:
     -------
         True if the file was downloaded, False if a cached file was found
     """
-    if not save_path.exists():
-        with requests.get(url_download, stream=True, timeout=30) as response:
-            response.raise_for_status()
+    if save_path.exists():
+        if not save_path.is_symlink():
+            print(f"Skipping download of {save_path} because it already exists.")
+            return False
+        os.remove(save_path)
 
-            with open(save_path, mode="wb") as file:
-                file.write(response.content)
+    with requests.get(url_download, stream=True, timeout=30) as response:
+        response.raise_for_status()
+        with open(save_path, mode="wb") as file:
+            file.write(response.content)
 
-        return True
-    else:
-        print(f"Skipping download of {save_path}" " because it already exists.")
-    return False
+    return True
 
 
 def unzip_file(local_filename: pathlib.Path) -> list[str]:
@@ -275,14 +173,21 @@ def unzip_file(local_filename: pathlib.Path) -> list[str]:
                     extracted_file_path = local_filename.parent / file_info.filename
 
                     if extracted_file_path.exists():
-                        print(
-                            f"File '{file_info.filename}' already exists. "
-                            f"Skipping extraction."
-                        )
-                    else:
-                        print(f"Extracting '{file_info.filename}'...")
-                        zip_file.extract(file_info, local_filename.parent)
-                        unzipped_files.append(local_filename.name)
+                        if not extracted_file_path.is_symlink():
+                            print(
+                                f"File '{file_info.filename}' already exists. "
+                                f"Skipping extraction."
+                            )
+                            continue
+                        else:
+                            file_to_unzip_path = (
+                                local_filename.parent / file_info.filename
+                            )
+                            os.remove(file_to_unzip_path)
+
+                    print(f"Extracting '{file_info.filename}'...")
+                    zip_file.extract(file_info, local_filename.parent)
+                    unzipped_files.append(local_filename.name)
 
         # TODO Better error logging/visibilty
         except zipfile.BadZipFile:

+ 1 - 1
src/faostat_data_primap/exceptions.py

@@ -18,5 +18,5 @@ class DateTagNotFoundError(Exception):
         url
             Link to download domain page
         """
-        msg = f"Tag for date lat updated was not found on page with url {url}."
+        msg = f"Tag for date last updated was not found on page with url {url}."
         super().__init__(msg)

+ 14 - 0
src/faostat_data_primap/helper/definitions.py

@@ -422,3 +422,17 @@ config_to_if = {
         "institution": ("Food and Agriculture Organization of the United Nations"),
     },
 }
+
+# TODO maybe this should live somewhere else?
+# Definition of the domains and releases to be read
+domains_and_releases_to_read = {
+    "2024": [
+        ("farm_gate_agriculture_energy", "2024-11-14"),
+        ("farm_gate_emissions_crops", "2024-11-14"),
+        ("farm_gate_livestock", "2024-11-14"),
+        ("land_use_drained_organic_soils", "2024-11-14"),
+        ("land_use_fires", "2024-11-14"),
+        ("land_use_forests", "2024-11-14"),
+        ("pre_post_agricultural_production", "2024-11-14"),
+    ]
+}

+ 54 - 20
src/faostat_data_primap/read.py

@@ -60,30 +60,30 @@ def get_latest_release(domain_path: pathlib.Path) -> str:
     return sorted(all_releases, reverse=True)[0]
 
 
-def read_latest_data(
-    downloaded_data_path: pathlib.Path = downloaded_data_path,
-    save_path: pathlib.Path = extracted_data_path,
+def read_data(
+    read_path: pathlib.Path,
+    domains_and_releases_to_read: list[tuple[str, str]],
+    save_path: pathlib.Path,
 ) -> None:
     """
-    Read and save the latest data
+    Read specified domains and releases and save output files.
 
-    Converts downloaded data into interchange format and primap2 native format
-    and saves the files in the extracted_data directory.
+    Parameters
+    ----------
+    read_path
+        Where to look for the downloaded data
+    domains_and_releases_to_read
+        The domains and releases to use
+    save_path
+        The path to save the data to
 
     """
-    domains = get_all_domains(downloaded_data_path)
-
-    files_to_read = []
-    for domain in domains:
-        domain_path = downloaded_data_path / domain
-        files_to_read.append((domain, get_latest_release(domain_path)))
-
     df_list = []
-    for domain, release in files_to_read:
+    for domain, release in domains_and_releases_to_read:
         read_config = read_config_all[domain][release]
 
         print(f"Read {read_config['filename']}")
-        dataset_path = downloaded_data_path / domain / release / read_config["filename"]
+        dataset_path = read_path / domain / release / read_config["filename"]
 
         # There are some non-utf8 characters
         df_domain = pd.read_csv(dataset_path, encoding="ISO-8859-1")
@@ -129,8 +129,11 @@ def read_latest_data(
 
     df_all = pd.concat(df_list, axis=0, join="outer", ignore_index=True)
 
-    # sometimes Source is empty
-    df_all["Source"] = df_all["Source"].fillna("unknown")
+    # some domains don't have Source column or values are empty
+    if "Source" not in df_all.columns:
+        df_all["Source"] = "unknown"
+    else:
+        df_all["Source"] = df_all["Source"].fillna("unknown")
 
     # Remove the "Y" prefix for the years columns
     df_all = df_all.rename(columns=lambda x: x.lstrip("Y") if x.startswith("Y") else x)
@@ -139,7 +142,9 @@ def read_latest_data(
     df_all["Unit"] = df_all["entity"] + " * " + df_all["Unit"] + "/ year"
     df_all["Unit"] = df_all["Unit"].replace(read_config_all["replace_units"])
 
-    date_last_updated = sorted([i[1] for i in files_to_read], reverse=True)[0]
+    date_last_updated = sorted(
+        [i[1] for i in domains_and_releases_to_read], reverse=True
+    )[0]
     release_name = f"v{date_last_updated}"
 
     data_if = pm2.pm2io.convert_wide_dataframe_if(
@@ -171,15 +176,44 @@ def read_latest_data(
     if not output_folder.exists():
         output_folder.mkdir()
 
+    filepath = output_folder / (output_filename + ".csv")
+    print(f"Writing primap2 file to {filepath}")
     pm2.pm2io.write_interchange_format(
-        output_folder / (output_filename + ".csv"),
+        filepath,
         data_if,
     )
 
     compression = dict(zlib=True, complevel=9)
     encoding = {var: compression for var in data_pm2.data_vars}
-    data_pm2.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
+    filepath = output_folder / (output_filename + ".nc")
+    print(f"Writing netcdf file to {filepath}")
+    data_pm2.pr.to_netcdf(filepath, encoding=encoding)
 
     # next steps
     # convert to IPCC2006_PRIMAP categories
     # save final version
+
+
+def read_latest_data(
+    downloaded_data_path_custom: pathlib.Path = downloaded_data_path,
+    save_path: pathlib.Path = extracted_data_path,
+) -> None:
+    """
+    Read and save the latest data
+
+    Converts downloaded data into interchange format and primap2 native format
+    and saves the files in the extracted_data directory.
+
+    """
+    domains = get_all_domains(downloaded_data_path_custom)
+
+    domains_and_releases_to_read = []
+    for domain in domains:
+        domain_path = downloaded_data_path_custom / domain
+        domains_and_releases_to_read.append((domain, get_latest_release(domain_path)))
+
+    read_data(
+        read_path=downloaded_data_path_custom,
+        domains_and_releases_to_read=domains_and_releases_to_read,
+        save_path=save_path,
+    )

+ 1 - 1
tests/integration/test_download_and_read.py

@@ -36,7 +36,7 @@ def test_download_all_domains(tmp_path):
 
     # read and save latest data
     read_latest_data(
-        downloaded_data_path=downloaded_data_path, save_path=extracted_data_path
+        downloaded_data_path_custom=downloaded_data_path, save_path=extracted_data_path
     )
 
     release_folder = os.listdir(extracted_data_path)

+ 0 - 219
tests/unit/test_download.py

@@ -1,11 +1,4 @@
 import pytest
-import requests
-
-from src.faostat_data_primap.download import (
-    calculate_checksum,
-    download_methodology,
-    find_previous_release_path,
-)
 
 
 @pytest.fixture
@@ -47,215 +40,3 @@ def temp_domain_directories(tmp_path):
         "downloaded_data": downloaded_data,
         "domain_paths": domain_paths,
     }
-
-
-@pytest.mark.parametrize(
-    "releases," "current_release_date, " "expected_result_date",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2024-11-29",
-            "2024-11-09",
-            id="current release is latest release",
-        ),
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2023-12-13",
-            "2022-03-18",
-            id="current somewhere not the latest release",
-        ),
-    ],
-)
-def test_find_previous_release_path_exists(
-    temp_domain_directories, releases, current_release_date, expected_result_date
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-    expected_result = domain_path / expected_result_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    result = find_previous_release_path(current_release_path)
-
-    assert result == expected_result
-
-
-@pytest.mark.parametrize(
-    "releases,current_release_date",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2022-03-18",
-            id="current release is oldest release",
-        ),
-        pytest.param(
-            ["2024-11-09"], "2024-11-09", id="current release is only release"
-        ),
-    ],
-)
-def test_find_previous_release_path_that_does_not_exists(
-    temp_domain_directories, releases, current_release_date
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    result = find_previous_release_path(current_release_path)
-
-    assert not result
-
-
-@pytest.mark.parametrize(
-    "releases,current_release_date, error_msg",
-    [
-        pytest.param(
-            ["2023-12-13", "2022-03-18", "2024-11-29", "20240-11-09"],
-            "2022-03-18",
-            (
-                "All release folders must be in YYYY-MM-DD format, got "
-                "['2022-03-18', '2023-12-13', '2024-11-29', '20240-11-09']"
-            ),
-            id="typo",
-        ),
-        pytest.param(
-            ["20231213", "2022-03-18", "2024-11-29", "2024-11-09"],
-            "2022-03-18",
-            (
-                "All release folders must be in YYYY-MM-DD format, got "
-                "['2022-03-18', '20231213', '2024-11-09', '2024-11-29']"
-            ),
-            id="missing hyphen",
-        ),
-    ],
-)
-def test_find_previous_release_path_wrong_dir_format(
-    temp_domain_directories, releases, current_release_date, error_msg
-):
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-    current_release_path = domain_path / current_release_date
-
-    release_paths = []
-    for release in releases:
-        release_path = domain_path / release
-        release_path.mkdir()
-        release_paths.append(release_path)
-
-    with pytest.raises(ValueError) as excinfo:
-        result = find_previous_release_path(current_release_path)  # noqa: F841
-
-    assert str(excinfo.value) == error_msg
-
-
-def test_calculate_checksum(tmp_path):
-    filepath_a = tmp_path / "test_file_a.txt"
-    with open(filepath_a, "w") as f:
-        f.write("content of file a")
-
-    filepath_b = tmp_path / "test_file_b.txt"
-    with open(filepath_b, "w") as f:
-        f.write("content of file a")
-
-    filepath_c = tmp_path / "test_file_c.txt"
-    with open(filepath_c, "w") as f:
-        f.write("content of file c")
-
-    checksum_a = calculate_checksum(filepath_a)
-
-    checksum_b = calculate_checksum(filepath_b)
-
-    checksum_c = calculate_checksum(filepath_c)
-
-    assert checksum_a == checksum_b
-
-    assert checksum_b != checksum_c
-
-
-def test_file_exists_in_previous_release_and_is_the_same(temp_domain_directories):
-    # set up temporary directories
-    downloaded_data_path = temp_domain_directories["downloaded_data"]
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    response = requests.get(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf",
-        stream=True,
-        timeout=30,
-    )
-    response.raise_for_status()  # Check for successful request
-    with open(file_to_compare_path, "wb") as f:
-        f.write(response.content)
-    save_path = downloaded_data_path / "farm_gate_emissions_crops" / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.is_symlink()
-
-
-def test_methodology_document_exists_in_previous_release_but_is_different(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    file_to_compare_path = domain_path / "2024-11-09" / "GCE_e.pdf"
-    with open(file_to_compare_path, "wb") as f:
-        s = "hi"
-        f.write(s.encode("utf-8"))
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()
-
-
-def test_methodology_document_does_not_exist_in_previous_release(
-    temp_domain_directories,
-):
-    # set up temporary directories
-    domain_path = temp_domain_directories["domain_paths"][
-        0
-    ]  # farm_gate_emissions_crops
-
-    # make folders for different releases
-    for release in ["2023-12-13", "2022-03-18", "2024-11-29", "2024-11-09"]:
-        release_path = domain_path / release
-        release_path.mkdir()
-
-    save_path = domain_path / "2024-11-29"
-    download_methodology(
-        "https://files-faostat.fao.org/production/GCE/GCE_e.pdf", save_path=save_path
-    )
-    downloaded_file_path = domain_path / "2024-11-29" / "GCE_e.pdf"
-    assert downloaded_file_path.exists()

+ 0 - 11
tests/unit/test_operations.py

@@ -1,11 +0,0 @@
-"""
-Test operations
-
-This module is just there to help with doc building etc. on
-project creation. You will probably delete it early in the project.
-"""
-from faostat_data_primap.operations import add_two
-
-
-def test_add_two():
-    assert add_two(3, 4) == 7