Sfoglia il codice sorgente

[DATALAD] Recorded changes

Daniel Busch 4 mesi fa
parent
commit
05e714be73
4 ha cambiato i file con 16 aggiunte e 17 eliminazioni
  1. 0 10
      Makefile
  2. 3 0
      pyproject.toml
  3. 12 6
      src/faostat_data_primap/read.py
  4. 1 1
      tests/unit/test_read_data.py

+ 0 - 10
Makefile

@@ -77,13 +77,3 @@ virtual-environment:  ## update virtual environment, create a new one if it does
 	poetry config virtualenvs.in-project true
 	poetry install --all-extras
 	poetry run pre-commit install
-
-.PHONY: download_all_domains-environment
-download_all_domains:
-	# downloads and stages (datalad save) all available data
-	datalad run poetry run python3 scripts/download_all_domains.py
-
-.PHONY: read_latest_data
-download_all_domains:
-	# reads and stages (datalad save) the latest data for each domain
-	datalad run poetry run python3 scripts/read_all_domains.py

+ 3 - 0
pyproject.toml

@@ -206,6 +206,9 @@ authorized_licenses = [
     "python software foundation license",
     "zpl 2.1",
     'CMU License (MIT-CMU)',
+    'GNU General Public License v3 (GPLv3)',
+    'GNU Lesser General Public License v3 (LGPLv3)',
+
 ]
 # This starting list is relatively conservative. Depending on the project, it
 # may make sense to move some of these into the authorized list

+ 12 - 6
src/faostat_data_primap/read.py

@@ -60,14 +60,18 @@ def get_latest_release(domain_path: pathlib.Path) -> str:
     return sorted(all_releases, reverse=True)[0]
 
 
-def read_data(domains_and_releases_to_read, save_path) -> None:
+def read_data(
+    domains_and_releases_to_read: tuple[tuple[str, str]], save_path: pathlib.Path
+) -> None:
     """
-    Read specified domains and releases.
+    Read specified domains and releases and save output files.
 
     Parameters
     ----------
     domains_and_releases_to_read
+        The domains and releases to use
     save_path
+        The path to save the data to
 
     """
     df_list = []
@@ -168,16 +172,18 @@ def read_data(domains_and_releases_to_read, save_path) -> None:
     if not output_folder.exists():
         output_folder.mkdir()
 
-    print(f"Writing primap2 file to {output_folder / (output_filename + ".csv")}")
+    filepath = output_folder / (output_filename + ".csv")
+    print(f"Writing primap2 file to {filepath}")
     pm2.pm2io.write_interchange_format(
-        output_folder / (output_filename + ".csv"),
+        filepath,
         data_if,
     )
 
     compression = dict(zlib=True, complevel=9)
     encoding = {var: compression for var in data_pm2.data_vars}
-    print(f"Writing netcdf file to {output_folder / (output_filename + ".nc")}")
-    data_pm2.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
+    filepath = output_folder / (output_filename + ".nc")
+    print(f"Writing netcdf file to {filepath}")
+    data_pm2.pr.to_netcdf(filepath, encoding=encoding)
 
     # next steps
     # convert to IPCC2006_PRIMAP categories

+ 1 - 1
tests/unit/test_read_data.py

@@ -5,7 +5,7 @@ from src.faostat_data_primap.read import read_data
 
 def test_read_data_one_domain(tmp_path):
     # read only one domain for the test
-    domains_and_releases_to_read = (("farm_gate_agriculture_energy", "2024-11-14"),)
+    domains_and_releases_to_read = (("land_use_fires", "2024-11-14"),)
 
     read_data(
         domains_and_releases_to_read=domains_and_releases_to_read, save_path=tmp_path