Procházet zdrojové kódy

add script to download data in CSV and parquet format

Mika Pflüger před 3 roky
rodič
revize
91ad5f7497
4 změnil soubory, kde provedl 63 přidání a 0 odebrání
  1. 1 0
      .gitignore
  2. 31 0
      Makefile
  3. 25 0
      download.py
  4. 6 0
      requirements.txt

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+venv/

+ 31 - 0
Makefile

@@ -0,0 +1,31 @@
+.PHONY: help virtual-environment update-venv
+.DEFAULT_GOAL := help
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+	if match:
+		target, help = match.groups()
+		print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+
+help:
+	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+virtual-environment: venv ## setup a virtual environment for development
+
+venv: requirements.txt ## setup a virtual environment for development
+	[ -d venv ] || python3 -m venv venv
+	venv/bin/python -m pip install -r requirements.txt
+	touch venv
+
+update-venv:  ## update the development virtual environment
+	[ -d venv ] || python3 -m venv venv
+	venv/bin/python -m pip install --upgrade -r requirements.txt
+	touch venv
+
+download: venv download.py  ## download all the data
+	venv/bin/python download.py

+ 25 - 0
download.py

@@ -0,0 +1,25 @@
+import tqdm
+import os
+import pathlib
+
+from unfccc_di_api import UNFCCCApiReader
+
+ROOT_DIR = pathlib.Path(os.path.abspath(os.curdir))  # This is your Project Root
+
+
+def main():
+    r = UNFCCCApiReader()
+    for party in tqdm.tqdm(r.parties["code"], desc="parties"):
+        df = r.query(party_code=party, progress=True)
+
+        annexI = party in r.annex_one_reader.parties["code"].values
+        subdir = "annexI" if annexI else "non-annexI"
+        directory = ROOT_DIR / "data" / subdir
+        if not directory.exists():
+            directory.mkdir()
+        df.to_csv(directory / f"{party}.csv.gz", compression="gzip")
+        df.to_parquet(directory / f"{party}.parquet", compression="brotli")
+
+
+if __name__ == "__main__":
+    main()

+ 6 - 0
requirements.txt

@@ -0,0 +1,6 @@
+unfccc_di_api
+pandas
+treelib
+tqdm
+requests
+pyarrow