Procházet zdrojové kódy

update NC, work on dodo.py

Johannes Gütschow před 9 měsíci
rodič
revize
d8a4dd178a
2 změnil soubory, kde provedl 253 přidání a 232 odebrání
  1. 252 231
      dodo.py
  2. 1 1
      downloaded_data/UNFCCC/submissions-nc.csv

+ 252 - 231
dodo.py

@@ -110,17 +110,12 @@ def task_update_bur():
             dataset=root_path,
             message="Fetch BUR submissions",
             outputs="downloaded_data/UNFCCC/submissions-bur.csv",
-            dry_run=True,
+            dry_run=None,
             explicit=True,
         )
 
     return {
         "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
-        # "actions": [
-        #     'datalad run -m "Fetch BUR submissions" '
-        #     "-o downloaded_data/UNFCCC/submissions-bur.csv "
-        #     "python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py"
-        # ],
         "actions": [
             (fetch_bur,),
         ],
@@ -158,231 +153,257 @@ def task_download_bur():
     }
 
 
-#
-#
-# def task_update_nc():
-#     """Update list of NC submissions"""
-#     return {
-#         "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
-#                 "fetch_submissions_nc.py",
-#                 dataset=root_path,
-#                 message="Fetch NC submissions",
-#                 outputs="downloaded_data/UNFCCC/submissions-nc.csv",
-#                 dry_run=None,
-#                 explicit=True,
-#             ),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_download_nc():
-#     """Download NC submissions"""
-#     return {
-#         #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
-#         # deactivate file_dep fow now as it will always run fetch submissions
-#         # before download
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
-#                 "download_nonannexI.py --category=NC",
-#                 dataset=root_path,
-#                 message="Download NC submissions",
-#                 inputs="downloaded_data/UNFCCC/submissions-nc.csv",
-#                 dry_run=None,
-#                 explicit=False,
-#             ),
-#             map_folders("downloaded_data/UNFCCC"),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# # annexI data: one update call for all data types (as they are on one page)
-# # but for each year separately.
-# # downloading is per year and
-# update_aI_config = {
-#     "year": get_var("year", None),
-#     "category": get_var("category", None),
-# }
-#
-#
-# def task_update_annexi():
-#     """Update list of AnnexI submissions"""
-#     return {
-#         "targets": [
-#             f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
-#         ],
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="python src/unfccc_ghg_data/unfccc_downloader/"
-#                 "fetch_submissions_annexI.py "
-#                 f"--year={update_aI_config['year']}",
-#                 dataset=root_path,
-#                 message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
-#                 outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
-#                 f"{update_aI_config['year']}.csv",
-#                 dry_run=None,
-#                 explicit=True,
-#             ),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_download_annexi():
-#     """Download AnnexI submissions"""
-#     return {
-#         #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
-#         # deactivate file_dep fow now as it will always run fetch submissions
-#         # before download
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
-#                 f"--category={update_aI_config['category']} "
-#                 f"--year={update_aI_config['year']}",
-#                 dataset=root_path,
-#                 message=f"Download AnnexI submissions for "
-#                 f"{update_aI_config['category']}"
-#                 f"{update_aI_config['year']}",
-#                 inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
-#                 f"{update_aI_config['year']}.csv",
-#                 dry_run=None,
-#                 explicit=False,
-#             ),
-#             map_folders("downloaded_data/UNFCCC"),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# # annexI data: one update call for all data types (as they are on one page)
-# # but for each year separately.
-# # downloading is per year and
-# update_btr_config = {
-#     "round": get_var("round", None),
-# }
-#
-#
-# def task_update_btr():
-#     """Update list of BTR submissions"""
-#     return {
-#         "targets": [
-#             f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
-#         ],
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="python src/unfccc_ghg_data/unfccc_downloader/"
-#                 "fetch_submissions_btr.py "
-#                 f"--round={update_btr_config['round']}",
-#                 dataset=root_path,
-#                 message=f"Fetch Biannial Transparency Report submissions for "
-#                 f"BTR{update_btr_config['round']}",
-#                 outputs=f"downloaded_data/UNFCCC/submissions-BTR"
-#                 f"{update_btr_config['round']}.csv",
-#                 dry_run=None,
-#                 explicit=True,
-#             ),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_download_btr():
-#     """Download BTR submissions"""
-#     return {
-#         #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
-#         # deactivate file_dep fow now as it will always run fetch submissions
-#         # before download
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
-#                 f"--round={update_btr_config['round']}",
-#                 dataset=root_path,
-#                 message="Download BTR submissions for "
-#                 f"BTR{update_btr_config['round']}",
-#                 inputs=f"downloaded_data/UNFCCC/submissions-BTR"
-#                 f"{update_btr_config['round']}.csv",
-#                 dry_run=None,
-#                 explicit=False,
-#             ),
-#             map_folders("downloaded_data/UNFCCC"),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_download_ndc():
-#     """Download NDC submissions"""
-#     return {
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
-#                 dataset=root_path,
-#                 message="Download NDC submissions",
-#                 inputs=None,
-#                 dry_run=None,
-#                 explicit=False,
-#             ),
-#             map_folders("downloaded_data/UNFCCC"),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# # read UNFCCC submissions.
-# # datalad run is called from within the read_UNFCCC_submission.py script
-# read_config = {
-#     "country": get_var("country", None),
-#     "submission": get_var("submission", None),
-# }
-#
-#
-# # TODO: make individual task for non-UNFCCC submissions
-# def task_read_unfccc_submission():
-#     """Read submission for a country (if code exists) (not for CRF)"""
-#     return {
-#         # "actions": [
-#         #     f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
-#         #     f"--country={read_config['country']} --submission={read_config['submission']}",
-#         #     "python src/unfccc_ghg_data/helper/folder_mapping.py "
-#         #     "--folder=extracted_data/UNFCCC",
-#         # ],
-#         "actions": [
-#             set_root_path(),
-#             datalad.api.run(
-#                 cmd="src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py"
-#                 f"--country={read_config['country']} "
-#                 f"--submission={read_config['submission']}",
-#                 dataset=root_path,
-#                 message=f"Read {read_config['submission']} for country "
-#                 f"{read_config['country']}",
-#                 dry_run=None,
-#                 explicit=False,
-#             ),
-#             map_folders("extracted_data/UNFCCC"),
-#         ],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
+def task_update_nc():
+    """Update list of NC submissions"""
+
+    def fetch_nc():
+        datalad.api.run(
+            cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
+            "fetch_submissions_nc.py",
+            dataset=root_path,
+            message="Fetch NC submissions",
+            outputs="downloaded_data/UNFCCC/submissions-nc.csv",
+            dry_run=None,
+            explicit=True,
+        )
+
+    return {
+        "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
+        "actions": [
+            (fetch_nc,),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_download_nc():
+    """Download BUR submissions"""
+
+    def download_nc():
+        (
+            datalad.api.run(
+                cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
+                "download_nonannexI.py --category=NC",
+                dataset=root_path,
+                message="Download NC submissions",
+                inputs="downloaded_data/UNFCCC/submissions-nc.csv",
+                dry_run=None,
+                explicit=False,
+            ),
+        )
+
+    return {
+        #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
+        # deactivate file_dep fow now as it will always run fetch submissions
+        # before download
+        "actions": [
+            (download_nc,),
+            (map_folders, ["downloaded_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+# annexI data: one update call for all data types (as they are on one page)
+# but for each year separately.
+# downloading is per year and
+update_aI_config = {
+    "year": get_var("year", None),
+    "category": get_var("category", None),
+}
+
+
+def task_update_annexi():
+    """Update list of AnnexI submissions"""
+
+    def fetch_annexi():
+        (
+            datalad.api.run(
+                cmd="python src/unfccc_ghg_data/unfccc_downloader/"
+                "fetch_submissions_annexI.py "
+                f"--year={update_aI_config['year']}",
+                dataset=root_path,
+                message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
+                outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
+                f"{update_aI_config['year']}.csv",
+                dry_run=None,
+                explicit=True,
+            ),
+        )
+
+    return {
+        "targets": [
+            f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
+        ],
+        "actions": [
+            (fetch_annexi,),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_download_annexi():
+    """Download AnnexI submissions"""
+
+    def download_annexi():
+        (
+            datalad.api.run(
+                cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
+                f"--category={update_aI_config['category']} "
+                f"--year={update_aI_config['year']}",
+                dataset=root_path,
+                message=f"Download AnnexI submissions for "
+                f"{update_aI_config['category']}"
+                f"{update_aI_config['year']}",
+                inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
+                f"{update_aI_config['year']}.csv",
+                dry_run=None,
+                explicit=False,
+            ),
+        )
+
+    return {
+        # 'file_dep': [f"downloaded_data/UNFCCC/submissions-annex1_"
+        #              f"{update_aI_config['year']}.csv"],
+        # deactivate file_dep fow now as it will always run fetch submissions
+        # before download
+        "actions": [
+            (download_annexi,),
+            (map_folders, ["downloaded_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+# BTR data: one update call for all data types (as they are on one page)
+# but for each submission round separately.
+# downloading is per submission round
+update_btr_config = {
+    "round": get_var("round", None),
+}
+
+
+def task_update_btr():
+    """Update list of BTR submissions"""
+
+    def fetch_btr():
+        (
+            datalad.api.run(
+                cmd="python src/unfccc_ghg_data/unfccc_downloader/"
+                "fetch_submissions_btr.py "
+                f"--round={update_btr_config['round']}",
+                dataset=root_path,
+                message=f"Fetch Biannial Transparency Report submissions for "
+                f"BTR{update_btr_config['round']}",
+                outputs=f"downloaded_data/UNFCCC/submissions-BTR"
+                f"{update_btr_config['round']}.csv",
+                dry_run=None,
+                explicit=True,
+            ),
+        )
+
+    return {
+        "targets": [
+            f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
+        ],
+        "actions": [
+            (fetch_btr,),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_download_btr():
+    """Download BTR submissions"""
+
+    def download_btr():
+        (
+            datalad.api.run(
+                cmd="src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
+                f"--round={update_btr_config['round']}",
+                dataset=root_path,
+                message="Download BTR submissions for "
+                f"BTR{update_btr_config['round']}",
+                inputs=f"downloaded_data/UNFCCC/submissions-BTR"
+                f"{update_btr_config['round']}.csv",
+                dry_run=None,
+                explicit=False,
+            ),
+        )
+
+    return {
+        # 'file_dep': [f"downloaded_data/UNFCCC/submissions-btr.csv "
+        #              f"{update_btr_config['round']}.csv"],
+        # deactivate file_dep fow now as it will always run fetch submissions
+        # before download
+        "actions": [
+            (download_btr,),
+            (map_folders, ["downloaded_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_download_ndc():
+    """Download NDC submissions"""
+
+    def download_ndc():
+        (
+            datalad.api.run(
+                cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
+                dataset=root_path,
+                message="Download NDC submissions",
+                inputs=None,
+                dry_run=None,
+                explicit=False,
+            ),
+        )
+
+    return {
+        "actions": [
+            (download_ndc,),
+            (map_folders, ["downloaded_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+# read UNFCCC submissions.
+# datalad run is called from within the read_UNFCCC_submission.py script
+read_config = {
+    "country": get_var("country", None),
+    "submission": get_var("submission", None),
+}
+
+
+# TODO: make individual task for non-UNFCCC submissions
+def task_read_unfccc_submission():
+    """Read submission for a country (if code exists) (not for CRF)
+
+    Datalad is called from `read_UNFCCC_submission`, so we can just call this script
+    here.
+
+    TODO: check if it makes sense to convert script to function
+    """
+    return {
+        "actions": [
+            f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
+            f"--country={read_config['country']} "
+            f"--submission={read_config['submission']}",
+            (map_folders, ["extracted_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
 #
 # # read UNFCCC submissions.
 # # datalad run is called from within the read_UNFCCC_submission.py script

+ 1 - 1
downloaded_data/UNFCCC/submissions-nc.csv

@@ -1 +1 @@
-../../.git/annex/objects/qg/KM/MD5E-s84266--5b3532af589257a7f3b86387633e58cf.csv/MD5E-s84266--5b3532af589257a7f3b86387633e58cf.csv
+../../.git/annex/objects/7v/V9/MD5E-s84722--00b0ee8727956dafa359bbe5b4ae844c.csv/MD5E-s84722--00b0ee8727956dafa359bbe5b4ae844c.csv