فهرست منبع

convert DI tasks to new structure, delete obsolete scripts, rewrite CRF, BUR, .. tasks

Johannes Gütschow 9 ماه پیش
والد
کامیت
5130c46c8a

+ 242 - 242
dodo.py

@@ -10,6 +10,10 @@ from doit import get_var
 root_path = "."
 os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
 
+from unfccc_ghg_data.helper.functions import (  # noqa: E402
+    get_country_datasets,
+    get_country_submissions,
+)
 from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_devel import (  # noqa: E402
     read_year_to_test_specs,
 )
@@ -17,6 +21,12 @@ from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import (  # noqa:
     read_crf_for_country_datalad,
     read_new_crf_for_year_datalad,
 )
+from unfccc_ghg_data.unfccc_di_reader import (  # noqa: E402
+    process_DI_for_country_datalad,
+    process_DI_for_country_group_datalad,
+    read_DI_for_country_datalad,
+    read_DI_for_country_group_datalad,
+)
 
 
 def set_root_path():
@@ -62,16 +72,6 @@ def task_in_venv():
     }
 
 
-# set UNFCCC_GHG_ROOT_PATH environment variable
-def task_set_env():
-    """
-    Set the environment variable for the module so data is stored in the correct folders
-    """
-    return {
-        "actions": [set_root_path],
-    }
-
-
 # Task to create the mapping files which map folder names to ISO 3-letter country codes
 read_config_folder = {
     "folder": get_var("folder", None),
@@ -92,22 +92,22 @@ def task_map_folders():
 # Tasks for getting submissions and downloading them
 def task_update_bur():
     """Update list of BUR submissions"""
-
-    def fetch_bur():
-        datalad.api.run(
-            cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
-            "fetch_submissions_bur.py",
-            dataset=root_path,
-            message="Fetch BUR submissions",
-            outputs="downloaded_data/UNFCCC/submissions-bur.csv",
-            dry_run=None,
-            explicit=True,
-        )
-
     return {
         "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
         "actions": [
-            (fetch_bur,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python3 src/unfccc_ghg_data/unfccc_downloader/"
+                    "fetch_submissions_bur.py",
+                    "dataset": root_path,
+                    "message": "Fetch BUR submissions",
+                    "outputs": "downloaded_data/UNFCCC/submissions-bur.csv",
+                    "dry_run": None,
+                    "explicit": True,
+                },
+            ),
         ],
         "verbosity": 2,
         "setup": ["in_venv"],
@@ -116,26 +116,24 @@ def task_update_bur():
 
 def task_download_bur():
     """Download BUR submissions"""
-
-    def download_bur():
-        (
-            datalad.api.run(
-                cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
-                "download_nonannexI.py --category=BUR",
-                dataset=root_path,
-                message="Download BUR submissions",
-                inputs="downloaded_data/UNFCCC/submissions-bur.csv",
-                dry_run=None,
-                explicit=False,
-            ),
-        )
-
     return {
         #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
         # deactivate file_dep fow now as it will always run fetch submissions
         # before download
         "actions": [
-            (download_bur,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python3 src/unfccc_ghg_data/unfccc_downloader/"
+                    "download_nonannexI.py --category=BUR",
+                    "dataset": root_path,
+                    "message": "Download BUR submissions",
+                    "inputs": "downloaded_data/UNFCCC/submissions-bur.csv",
+                    "dry_run": None,
+                    "explicit": False,
+                },
+            ),
             (map_folders, ["downloaded_data/UNFCCC"]),
         ],
         "verbosity": 2,
@@ -160,7 +158,19 @@ def task_update_nc():
     return {
         "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
         "actions": [
-            (fetch_nc,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python3 src/unfccc_ghg_data/unfccc_downloader/"
+                    "fetch_submissions_nc.py",
+                    "dataset": root_path,
+                    "message": "Fetch NC submissions",
+                    "outputs": "downloaded_data/UNFCCC/submissions-nc.csv",
+                    "dry_run": None,
+                    "explicit": True,
+                },
+            ),
         ],
         "verbosity": 2,
         "setup": ["in_venv"],
@@ -169,26 +179,24 @@ def task_update_nc():
 
 def task_download_nc():
     """Download BUR submissions"""
-
-    def download_nc():
-        (
-            datalad.api.run(
-                cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
-                "download_nonannexI.py --category=NC",
-                dataset=root_path,
-                message="Download NC submissions",
-                inputs="downloaded_data/UNFCCC/submissions-nc.csv",
-                dry_run=None,
-                explicit=False,
-            ),
-        )
-
     return {
         #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
         # deactivate file_dep fow now as it will always run fetch submissions
         # before download
         "actions": [
-            (download_nc,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python3 src/unfccc_ghg_data/unfccc_downloader/"
+                    "download_nonannexI.py --category=NC",
+                    "dataset": root_path,
+                    "message": "Download NC submissions",
+                    "inputs": "downloaded_data/UNFCCC/submissions-nc.csv",
+                    "dry_run": None,
+                    "explicit": False,
+                },
+            ),
             (map_folders, ["downloaded_data/UNFCCC"]),
         ],
         "verbosity": 2,
@@ -207,28 +215,26 @@ update_aI_config = {
 
 def task_update_annexi():
     """Update list of AnnexI submissions"""
-
-    def fetch_annexi():
-        (
-            datalad.api.run(
-                cmd="python src/unfccc_ghg_data/unfccc_downloader/"
-                "fetch_submissions_annexI.py "
-                f"--year={update_aI_config['year']}",
-                dataset=root_path,
-                message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
-                outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
-                f"{update_aI_config['year']}.csv",
-                dry_run=None,
-                explicit=True,
-            ),
-        )
-
     return {
         "targets": [
             f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
         ],
         "actions": [
-            (fetch_annexi,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python src/unfccc_ghg_data/unfccc_downloader/"
+                    "fetch_submissions_annexI.py "
+                    f"--year={update_aI_config['year']}",
+                    "dataset": root_path,
+                    "message": f"Fetch AnnexI submissions for {update_aI_config['year']}",
+                    "outputs": f"downloaded_data/UNFCCC/submissions-annexI_"
+                    f"{update_aI_config['year']}.csv",
+                    "dry_run": None,
+                    "explicit": True,
+                },
+            ),
         ],
         "verbosity": 2,
         "setup": ["in_venv"],
@@ -237,31 +243,29 @@ def task_update_annexi():
 
 def task_download_annexi():
     """Download AnnexI submissions"""
-
-    def download_annexi():
-        (
-            datalad.api.run(
-                cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
-                f"--category={update_aI_config['category']} "
-                f"--year={update_aI_config['year']}",
-                dataset=root_path,
-                message=f"Download AnnexI submissions for "
-                f"{update_aI_config['category']}"
-                f"{update_aI_config['year']}",
-                inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
-                f"{update_aI_config['year']}.csv",
-                dry_run=None,
-                explicit=False,
-            ),
-        )
-
     return {
         # 'file_dep': [f"downloaded_data/UNFCCC/submissions-annex1_"
         #              f"{update_aI_config['year']}.csv"],
         # deactivate file_dep fow now as it will always run fetch submissions
         # before download
         "actions": [
-            (download_annexi,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
+                    f"--category={update_aI_config['category']} "
+                    f"--year={update_aI_config['year']}",
+                    "dataset": root_path,
+                    "message": f"Download AnnexI submissions for "
+                    f"{update_aI_config['category']}"
+                    f"{update_aI_config['year']}",
+                    "inputs": f"downloaded_data/UNFCCC/submissions-annexI_"
+                    f"{update_aI_config['year']}.csv",
+                    "dry_run": None,
+                    "explicit": False,
+                },
+            ),
             (map_folders, ["downloaded_data/UNFCCC"]),
         ],
         "verbosity": 2,
@@ -279,29 +283,27 @@ update_btr_config = {
 
 def task_update_btr():
     """Update list of BTR submissions"""
-
-    def fetch_btr():
-        (
-            datalad.api.run(
-                cmd="python src/unfccc_ghg_data/unfccc_downloader/"
-                "fetch_submissions_btr.py "
-                f"--round={update_btr_config['round']}",
-                dataset=root_path,
-                message=f"Fetch Biannial Transparency Report submissions for "
-                f"BTR{update_btr_config['round']}",
-                outputs=f"downloaded_data/UNFCCC/submissions-BTR"
-                f"{update_btr_config['round']}.csv",
-                dry_run=None,
-                explicit=True,
-            ),
-        )
-
     return {
         "targets": [
             f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
         ],
         "actions": [
-            (fetch_btr,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python src/unfccc_ghg_data/unfccc_downloader/"
+                    "fetch_submissions_btr.py "
+                    f"--round={update_btr_config['round']}",
+                    "dataset": root_path,
+                    "message": f"Fetch Biannial Transparency Report submissions for "
+                    f"BTR{update_btr_config['round']}",
+                    "outputs": f"downloaded_data/UNFCCC/submissions-BTR"
+                    f"{update_btr_config['round']}.csv",
+                    "dry_run": None,
+                    "explicit": True,
+                },
+            ),
         ],
         "verbosity": 2,
         "setup": ["in_venv"],
@@ -310,29 +312,27 @@ def task_update_btr():
 
 def task_download_btr():
     """Download BTR submissions"""
-
-    def download_btr():
-        (
-            datalad.api.run(
-                cmd="python src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
-                f"--round={update_btr_config['round']}",
-                dataset=root_path,
-                message="Download BTR submissions for "
-                f"BTR{update_btr_config['round']}",
-                inputs=f"downloaded_data/UNFCCC/submissions-BTR"
-                f"{update_btr_config['round']}.csv",
-                dry_run=None,
-                explicit=False,
-            ),
-        )
-
     return {
         # 'file_dep': [f"downloaded_data/UNFCCC/submissions-btr.csv "
         #              f"{update_btr_config['round']}.csv"],
         # deactivate file_dep fow now as it will always run fetch submissions
         # before download
         "actions": [
-            (download_btr,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "python src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
+                    f"--round={update_btr_config['round']}",
+                    "dataset": root_path,
+                    "message": "Download BTR submissions for "
+                    f"BTR{update_btr_config['round']}",
+                    "inputs": f"downloaded_data/UNFCCC/submissions-BTR"
+                    f"{update_btr_config['round']}.csv",
+                    "dry_run": None,
+                    "explicit": False,
+                },
+            ),
             (map_folders, ["downloaded_data/UNFCCC"]),
         ],
         "verbosity": 2,
@@ -342,22 +342,20 @@ def task_download_btr():
 
 def task_download_ndc():
     """Download NDC submissions"""
-
-    def download_ndc():
-        (
-            datalad.api.run(
-                cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
-                dataset=root_path,
-                message="Download NDC submissions",
-                inputs=None,
-                dry_run=None,
-                explicit=False,
-            ),
-        )
-
     return {
         "actions": [
-            (download_ndc,),
+            (
+                datalad.api.run,
+                [],
+                {
+                    "cmd": "src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
+                    "dataset": root_path,
+                    "message": "Download NDC submissions",
+                    "inputs": None,
+                    "dry_run": None,
+                    "explicit": False,
+                },
+            ),
             (map_folders, ["downloaded_data/UNFCCC"]),
         ],
         "verbosity": 2,
@@ -460,7 +458,6 @@ def task_read_new_unfccc_crf_for_year():
             (read_new_CRF,),
             (map_folders, ["extracted_data/UNFCCC"]),
         ],
-        "task_dep": ["set_env"],
         "verbosity": 2,
         "setup": ["in_venv"],
     }
@@ -492,7 +489,10 @@ def task_test_read_unfccc_crf_for_year():
         )
 
     return {
-        "actions": [(read_CRF,)],
+        "actions": [
+            (read_CRF,),
+            (map_folders, ["extracted_data/UNFCCC"]),
+        ],
         "verbosity": 2,
         "setup": ["in_venv"],
     }
@@ -507,6 +507,7 @@ def task_compile_raw_unfccc_crf_for_year():
     which have not yet been read.
 
     Data are saved in the datasets/UNFCCC/[CRFYYYY|CRTX] folder.
+    TODO: could make a function from the script
     """
     actions = [
         f"python "
@@ -522,104 +523,103 @@ def task_compile_raw_unfccc_crf_for_year():
     }
 
 
-#
-# # tasks for DI reader
-# # datalad run is called from within the read_unfccc_di_for_country.py script
-# read_config_di = {
-#     "country": get_var("country", None),
-#     "date": get_var("date", None),
-#     "annexI": get_var("annexI", False),
-#     # "countries": get_var('countries', None),
-# }
-#
-#
-# def task_read_unfccc_di_for_country():
-#     """Read DI data for a country"""
-#     actions = [
-#         f"python "
-#         f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
-#         f"--country={read_config_di['country']}",
-#         "python src/unfccc_ghg_data/helper/folder_mapping.py "
-#         "--folder=extracted_data/UNFCCC",
-#     ]
-#     return {
-#         "actions": actions,
-#         "task_dep": ["set_env"],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_process_unfccc_di_for_country():
-#     """Process DI data for a country"""
-#     actions = [
-#         f"python "
-#         f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
-#         f".py "
-#         f"--country={read_config_di['country']} --date={read_config_di['date']}",
-#         "python src/unfccc_ghg_data/helper/folder_mapping.py "
-#         "--folder=extracted_data/UNFCCC",
-#     ]
-#     return {
-#         "actions": actions,
-#         "task_dep": ["set_env"],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_read_unfccc_di_for_country_group():
-#     """Read DI data for a country group"""
-#     actions = [
-#         "python "
-#         "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
-#         ".py",
-#         "python src/unfccc_ghg_data/helper/folder_mapping.py "
-#         "--folder=extracted_data/UNFCCC",
-#     ]
-#     if read_config_di["annexI"] == "True":
-#         actions[0] = actions[0] + " --annexI"
-#
-#     return {
-#         "actions": actions,
-#         "task_dep": ["set_env"],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# def task_process_unfccc_di_for_country_group():
-#     """Process DI data for a country group"""
-#     actions = [
-#         "python "
-#         "src/unfccc_ghg_data/unfccc_di_reader"
-#         "/process_unfccc_di_for_country_group_datalad"
-#         ".py",
-#     ]
-#     if read_config_di["annexI"] == "True":
-#         actions[0] = actions[0] + " --annexI"
-#     if read_config_di["date"] is not None:
-#         actions[0] = actions[0] + f" --date={read_config_di['date']}"
-#
-#     return {
-#         "actions": actions,
-#         "task_dep": ["set_env"],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
-#
-#
-# # general tasks
-# def task_country_info():
-#     """
-#     Print information on submissions and datasets available for given country
-#     """
-#     return {
-#         "actions": [
-#             f"python src/unfccc_ghg_data/helper/country_info.py "
-#             f"--country={read_config['country']}"
-#         ],
-#         "task_dep": ["set_env"],
-#         "verbosity": 2,
-#         "setup": ["in_venv"],
-#     }
+# tasks for DI reader
+# TODO DI tasks need testing
+# datalad run is called from within the read_unfccc_di_for_country.py script
+read_config_di = {
+    "country": get_var("country", None),
+    "date": get_var("date", None),
+    "annexI": get_var("annexI", False),
+    # "countries": get_var('countries', None),
+}
+
+
+def task_read_unfccc_di_for_country():
+    """Read DI data for a country"""
+    return {
+        "actions": [
+            (read_DI_for_country_datalad, [read_config_di["country"]]),
+            (map_folders, ["extracted_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_process_unfccc_di_for_country():
+    """Process DI data for a country"""
+    return {
+        "actions": [
+            (
+                process_DI_for_country_datalad,
+                [],
+                {
+                    "country": read_config_di["country"],
+                    "date_str": read_config_di["date"],
+                },
+            ),
+            (map_folders, ["extracted_data/UNFCCC"]),
+        ],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_read_unfccc_di_for_country_group():
+    """Read DI data for a country group"""
+
+    def read_DI():
+        if read_config_di["annexI"] == "True":
+            annexI = True
+        else:
+            annexI = False
+        read_DI_for_country_group_datalad(annexI=annexI)
+
+    return {
+        "actions": [(read_DI,), (map_folders, ["extracted_data/UNFCCC"])],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+def task_process_unfccc_di_for_country_group():
+    """Process DI data for a country group"""
+
+    def proc_DI():
+        if read_config_di["annexI"] == "True":
+            annexI = True
+        else:
+            annexI = False
+        process_DI_for_country_group_datalad(
+            annexI=annexI,
+            date_str=read_config_di["date"],
+        )
+
+    return {
+        "actions": [(proc_DI,), (map_folders, ["extracted_data/UNFCCC"])],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }
+
+
+# general tasks
+def task_country_info():
+    """
+    Print information on submissions and datasets available for given country
+    """
+
+    def country_info(country):
+        # print available submissions
+        print("=" * 15 + " Available submissions " + "=" * 15)
+        get_country_submissions(country, True)
+        print("")
+
+        # print available datasets
+        print("=" * 15 + " Available datasets " + "=" * 15)
+        get_country_datasets(country, True)
+
+    return {
+        "actions": [(country_info, [read_config["country"]])],
+        "verbosity": 2,
+        "setup": ["in_venv"],
+    }

+ 0 - 30
src/unfccc_ghg_data/helper/country_info.py

@@ -1,30 +0,0 @@
-"""
-script to display information on country
-
-This script takes country as input (from doit) and displays available
-submissions and datasets both read and not read
-"""
-
-import argparse
-
-from unfccc_ghg_data.helper.functions import (
-    get_country_datasets,
-    get_country_submissions,
-)
-
-if __name__ == "__main__":
-    # Find the right function and possible input and output files and
-    # read the data using datalad run.
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--country", help="Country name or code")
-    args = parser.parse_args()
-    country = args.country
-
-    # print available submissions
-    print("=" * 15 + " Available submissions " + "=" * 15)
-    get_country_submissions(country, True)
-    print("")
-
-    # print available datasets
-    print("=" * 15 + " Available datasets " + "=" * 15)
-    get_country_datasets(country, True)

+ 1 - 1
src/unfccc_ghg_data/helper/functions.py

@@ -938,7 +938,7 @@ def get_code_file(
 
     # CRF is an exception as it's read using the unfccc_crf_reader module
     # so we return the path to that.
-    if submission[0:3] == "CRF":
+    if submission[0:3] in ("CRF", "CRT"):
         return root_path / "unfccc_crf_reader"
 
     if submission[0:2] == "DI":

+ 0 - 29
src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad.py

@@ -1,29 +0,0 @@
-"""
-call process_DI_for_country_datalad
-
-wrapper around process_DI_for_country_datalad such that it can be called
-from doit in the current setup where doit runs on system python and
-not in the venv.
-"""
-
-import argparse
-
-from unfccc_ghg_data.unfccc_di_reader import process_DI_for_country_datalad
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--country", help="Country name or code")
-    parser.add_argument(
-        "--date",
-        help="String with date to read and process. If not "
-        "given latest data will be used",
-        default=None,
-    )
-    args = parser.parse_args()
-    country = args.country
-    date_str = args.date
-
-    if date_str == "None":
-        date_str = None
-
-    process_DI_for_country_datalad(country, date_str=date_str)

+ 0 - 31
src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_group_datalad.py

@@ -1,31 +0,0 @@
-"""
-call process_DI_for_country_group_datalad
-
-Wrapper around process_DI_for_country_group_datalad such that it can be called
-from doit in the current setup where doit runs on system python and
-not in the venv.
-"""
-
-import argparse
-
-from unfccc_ghg_data.unfccc_di_reader import process_DI_for_country_group_datalad
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--annexI",
-        help="read for AnnexI countries (default is for " "non-AnnexI)",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--date",
-        help="date of input data to use (default is None " "to read latest data)",
-        default=None,
-    )
-    args = parser.parse_args()
-    annexI = args.annexI
-    date_str = args.date
-    if date_str == "None":
-        date_str = None
-
-    process_DI_for_country_group_datalad(annexI=annexI, date_str=date_str)

+ 0 - 20
src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py

@@ -1,20 +0,0 @@
-"""
-Call read_DI_for_country_datalad
-
-wrapper around read_DI_for_country_datalad such that it can be called
-from doit in the current setup where doit runs on system python and
-not in the venv.
-"""
-
-import argparse
-
-from unfccc_ghg_data.unfccc_di_reader import read_DI_for_country_datalad
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--country", help="Country name or code")
-
-    args = parser.parse_args()
-    country = args.country
-
-    read_DI_for_country_datalad(country)

+ 0 - 25
src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad.py

@@ -1,25 +0,0 @@
-"""
-Call read_DI_fro_country_group_datalad
-
-wrapper around read_DI_for_country_group_datalad such that it can be called
-from doit in the current setup where doit runs on system python and
-not in the venv.
-"""
-
-import argparse
-
-from unfccc_ghg_data.unfccc_di_reader import read_DI_for_country_group_datalad
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--annexI",
-        help="read for AnnexI countries (default is for " "non-AnnexI)",
-        action="store_true",
-    )
-    args = parser.parse_args()
-    annexI = args.annexI
-
-    read_DI_for_country_group_datalad(
-        annexI=annexI,
-    )

+ 1 - 1
src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_datalad.py

@@ -88,7 +88,7 @@ def process_DI_for_country_datalad(
     """
     # get all the info for the country
     country_info = get_input_and_output_files_for_country_DI(
-        country, date_str, raw=True, verbose=True
+        country, date_str, raw=False, verbose=True
     )
 
     print(f"Attempting to process DI data for {country_info['name']}.")

+ 10 - 4
src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_helper.py

@@ -137,7 +137,7 @@ def determine_dataset_filename(
     return filename.relative_to(root_path)
 
 
-def get_input_and_output_files_for_country_DI(
+def get_input_and_output_files_for_country_DI(  # noqa: PLR0912
     country: str,
     date_str: str,
     raw: bool,
@@ -162,7 +162,10 @@ def get_input_and_output_files_for_country_DI(
     country_info["name"] = country_name
 
     # determine latest data
-    print(f"Determining output files for {country_name}")
+    if raw:
+        print(f"Determining output files for {country_name}")
+    else:
+        print(f"Determining input and output files for {country_name}")
 
     # get input files (only for processing)
     if raw:
@@ -172,6 +175,7 @@ def get_input_and_output_files_for_country_DI(
         if date_str is None:
             # get the latest date
             input_file = [find_latest_DI_data(country_code, raw=True)]
+
         else:
             input_file = [
                 determine_filename(country_code, date_str, raw=False, hash=False)
@@ -186,11 +190,13 @@ def get_input_and_output_files_for_country_DI(
                     ValueError,
                     f"Input file {input_file[0].name} is not a symlink "
                     f" or not existent. Check if the data you want to "
-                    f"process exists and if your repository is ",
+                    f"process exists and if your repository is clean.",
                 )
 
         input_files = [
-            f"{input_file.as_posix()}.{suffix}" for suffix in ["yaml", "csv", "nc"]
+            f"{file.parent / file.stem}.{suffix}"
+            for suffix in ["yaml", "csv", "nc"]
+            for file in input_file
         ]
 
         if verbose: