소스 검색

Add code to download data from zenodo; add more versions to versions.py

Johannes Gütschow 1 년 전
부모
커밋
3d6a30b84f
4개의 변경된 파일97개의 추가작업 그리고 0개의 파일을 삭제
  1. 9 0
      dodo.py
  2. 37 0
      src/download_version.py
  3. 32 0
      src/download_version_datalad.py
  4. 19 0
      src/versions.py

+ 9 - 0
dodo.py

@@ -26,4 +26,13 @@ def task_read_version():
                     f"--version={read_config['version']}"],
                     f"--version={read_config['version']}"],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],
+    }
+
+def task_download_version():
+    """ Download specific version of the data"""
+    return {
+        'actions': [f"./venv/bin/python src/download_version_datalad.py "
+                    f"--version={read_config['version']}"],
+        'verbosity': 2,
+        'setup': ['setup_venv'],
     }
     }

+ 37 - 0
src/download_version.py

@@ -0,0 +1,37 @@
+# script to download files for a given version from zenodo
+import argparse
+import requests
+import shutil
+from pathlib import Path
+from zipfile import ZipFile
+from versions import versions
+from definitions import downloaded_data_folder
+
+# handle command line parameter
+parser = argparse.ArgumentParser()
+parser.add_argument("--version", help="Version to read")
+args = parser.parse_args()
+version = args.version
+
+root_path = Path(".")
+
+version_info = versions[version]
+record_id = version_info["ref"].split(".")[-1]
+url = f"https://zenodo.org/api/records/{record_id}/files-archive"
+
+local_folder = root_path / downloaded_data_folder / version_info["folder"]
+if not local_folder.exists():
+    local_folder.mkdir()
+local_filename = local_folder / f"{record_id}.zip"
+
+#download all data in zip file
+r = requests.get(url, stream=True)
+with open(str(local_filename), 'wb') as f:
+    shutil.copyfileobj(r.raw, f)
+
+# extract data
+with ZipFile(str(local_filename), 'r') as f:
+    f.extractall(local_folder)
+
+# delete the zip file
+local_filename.unlink()

+ 32 - 0
src/download_version_datalad.py

@@ -0,0 +1,32 @@
+# script that calls datalad to run the data reading function
+
+import argparse
+import datalad.api
+from pathlib import Path
+from versions import versions
+from definitions import get_output_filename, downloaded_data_folder, extracted_data_folder
+
+# handle command line parameter
+parser = argparse.ArgumentParser()
+parser.add_argument("--version", help="Version to read")
+args = parser.parse_args()
+version = args.version
+
+root_path = Path(".")
+
+version_info = versions[version]
+
+# there are no input files
+# if files are in the target folder consider them to be output files
+output_folder = root_path / downloaded_data_folder / version_info["folder"]
+output_files = list(output_folder.iterdir())
+
+datalad.api.run(
+    cmd=f"./venv/bin/python3 src/download_version.py --version {version}",
+    dataset=root_path,
+    message=f"Download data for {version}.",
+    inputs=[],
+    outputs=output_files,
+    dry_run=None,
+    explicit=False,
+)

+ 19 - 0
src/versions.py

@@ -1,6 +1,25 @@
 # configurations for the different versions. mainly metadata
 # configurations for the different versions. mainly metadata
 
 
 versions = {
 versions = {
+    "v230913": {
+        'date': '13-Sep-2023',
+        'ver_str_long': 'version 230913',
+        'ver_str_short': '230913',
+        "folder": "v230913",
+        "transpose": True,
+        "filename": "0. GCP-CEM.csv",
+        'ref': '10.5281/zenodo.8339353',
+        'ref2': '10.5194/essd-11-1675-2019',
+        'title': 'Global CO2 emissions from cement production',
+        'institution': "CICERO - Center for International Climate Research",
+        'filter_keep': {},
+        'filter_remove': {},
+        'contact': "johannes.guetschow@climate-resource.com",
+        'comment': ("Published by Robbie Andrew, converted to PRIMAP2 format by "
+                    "Johannes Gütschow"),
+        'unit': 'kt * CO2 / year',
+        'country_code': True,
+    },
     "v230428": {
     "v230428": {
         'date': '28-Apr-2023',
         'date': '28-Apr-2023',
         'ver_str_long': 'version 230428',
         'ver_str_long': 'version 230428',