Browse Source

started integration of DI reading to doit infrastructure

Johannes Gütschow 1 year ago
parent
commit
a1e1c7558b

+ 1 - 0
DI_reading.dia

@@ -0,0 +1 @@
+.git/annex/objects/75/Pv/MD5E-s4431--8911139e2988aae3466a7b67ae6278a4.dia/MD5E-s4431--8911139e2988aae3466a7b67ae6278a4.dia

+ 37 - 61
UNFCCC_GHG_data/UNFCCC_DI_reader/UNFCCC_DI_reader_core.py

@@ -7,17 +7,20 @@ import itertools
 import json
 import copy
 import xarray as xr
+import datalad.api
 from datetime import date
-from typing import Optional, Dict, List
+from typing import Optional, Dict, List, Union
 from pathlib import Path
 from copy import deepcopy
 
-from UNFCCC_DI_reader_config import di_to_pm2if_template_nai
-from UNFCCC_DI_reader_config import di_to_pm2if_template_ai
-from UNFCCC_DI_reader_config import di_query_filters
-from UNFCCC_DI_reader_config import cat_conversion
-from util import NoDIDataError, extracted_data_path, get_country_name
-from util import nAI_countries, AI_countries
+from .UNFCCC_DI_reader_config import di_to_pm2if_template_nai
+from .UNFCCC_DI_reader_config import di_to_pm2if_template_ai
+from .UNFCCC_DI_reader_config import di_query_filters
+from .UNFCCC_DI_reader_config import cat_conversion
+from .util import NoDIDataError, extracted_data_path, \
+    get_country_name, get_country_code
+from .util import nAI_countries, AI_countries, custom_country_mapping
+from .util import code_path, root_path
 
 
 def read_UNFCCC_DI_for_country(
@@ -617,6 +620,7 @@ def convert_DI_IF_data_to_pm2(
 
     return data_pm2
 
+
 ## datalad and pydoit interface functions
 def read_DI_for_country_datalad(
         country: str,
@@ -629,27 +633,31 @@ def read_DI_for_country_datalad(
     __________
 
     country_codes: str
-        ISO 3-letter country code
+        country name or ISO 3-letter country code
 
     """
 
+    # get date to determine output filename
+    date_str = str(date.today())
+
     # get all the info for the country
-    country_info = get_input_and_output_files_for_country(
-        country, submission_year=submission_year, verbose=True)
+    country_info = get_output_files_for_country_DI(country, date_str,
+                                                   raw=True, verbose=True)
 
-    print(f"Attempting to read DI data for {country}.")
+    print(f"Attempting to read DI data for {country_info['name']}.")
     print("#"*80)
     print("")
     print(f"Using the UNFCCC_DI_reader")
     print("")
     print(f"Run the script using datalad run via the python api")
-    script = code_path / "UNFCCC_DI_reader" / "read_UNFCCC_DI_country.py"
+    script = code_path / "UNFCCC_DI_reader" / "read_UNFCCC_DI_for_country.py"
 
-    cmd = f"./venv/bin/python3 {script.as_posix()} --country={country} ""
+    cmd = f"./venv/bin/python3 {script.as_posix()} --country={country_info['code']} " \
+          f"--date={date_str}"
     datalad.api.run(
         cmd=cmd,
         dataset=root_path,
-        message=f"Read DI data for {country}.",
+        message=f"Read DI data for {country_info['name']}.",
         inputs=country_info["input"],
         outputs=country_info["output"],
         dry_run=None,
@@ -775,10 +783,11 @@ def convert_categories(
 
     return ds_converted
 
-def get_input_and_output_files_for_country(
+
+def get_output_files_for_country_DI(
         country: str,
-        submission_year: int,
-        submission_date: Optional[str]=None,
+        date_str: str,
+        raw: bool,
         verbose: Optional[bool]=True,
 ) -> Dict[str, Union[List, str]]:
     """
@@ -795,61 +804,28 @@ def get_input_and_output_files_for_country(
     country_name = get_country_name(country_code)
     country_info["code"] = country_code
     country_info["name"] = country_name
+    # now get the country name
+    country_name = get_country_name(country_code)
+    country_info["code"] = country_code
+    country_info["name"] = country_name
 
     # determine latest data
-    print(f"Determining input and output files for {country}")
-    if submission_date is None:
-        if verbose:
-            print(f"No submission date given, find latest date.")
-        submission_date = get_latest_date_for_country(country_code, submission_year)
-    else:
-        if verbose:
-            print(f"Using given submissions date {submission_date}")
-
-    if submission_date is None:
-        # there is no data. Raise an exception
-        raise NoCRFFilesError(f"No submissions found for {country_code}, "
-                              f"submission_year={submission_year}, "
-                              f"date={date}")
-    else:
-        if verbose:
-            print(f"Latest submission date for CRF{submission_year} is {submission_date}")
-    country_info["date"] = submission_date
-
-    # get possible input files
-    input_files = get_crf_files(country_codes=country_code,
-                                submission_year=submission_year,
-                                date=submission_date)
-    if not input_files:
-        raise NoCRFFilesError(f"No possible input files found for {country}, CRF{submission_year}, "
-                              f"v{submission_date}. Are they already submitted and included in the "
-                              f"repository?")
-    elif verbose:
-        print(f"Found the following input_files:")
-        for file in input_files:
-            print(file.name)
-        print("")
+    print(f"Determining output files for {country_name}")
 
+    # get output files
+    output_file = determine_filename(country_code, date_str, raw=raw)
 
-    # convert file's path to str
-    input_files = [file.as_posix() for file in input_files]
-    country_info["input"] = input_files
-
-    # get output file
-    output_folder = extracted_data_path / country_name.replace(" ", "_")
-    output_files = [output_folder / f"{country_code}_CRF{submission_year}"
-                                    f"_{submission_date}.{suffix}" for suffix
+    output_files = [f"{str(output_file)}.{suffix}" for suffix
                     in ['yaml', 'csv', 'nc']]
+
     if verbose:
         print(f"The following files are considered as output_files:")
         for file in output_files:
             print(file)
         print("")
 
-    # check if output data present
-
-    # convert file paths to str
-    output_files = [file.as_posix() for file in output_files]
+    # add to country infor
+    country_info["input"] = []
     country_info["output"] = output_files
 
     return country_info

+ 5 - 3
UNFCCC_GHG_data/UNFCCC_DI_reader/__init__.py

@@ -1,12 +1,14 @@
 # submodule to read data from UNFCCC DI API using the unfccc_di_api package
 
-import unfccc_di_api
-from UNFCCC_DI_reader_core import read_UNFCCC_DI_for_country_df, \
-    convert_DI_data_to_pm2_if, convert_DI_IF_data_to_pm2, determine_filename
+#import unfccc_di_api
+from .UNFCCC_DI_reader_core import read_UNFCCC_DI_for_country_df, \
+    convert_DI_data_to_pm2_if, convert_DI_IF_data_to_pm2, determine_filename, \
+    read_DI_for_country_datalad
 
 __all__ = [
     "read_UNFCCC_DI_for_country_df",
     "convert_DI_data_to_pm2_if",
     "convert_DI_IF_data_to_pm2",
     "determine_filename",
+    "read_DI_for_country_datalad",
 ]

+ 3 - 5
UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_country_datalad.py

@@ -4,16 +4,14 @@ from doit in the current setup where doit runs on system python and
 not in the venv.
 """
 
-from UNFCCC_DI_reader_core import read_DI_for_country_datalad
+from UNFCCC_GHG_data.UNFCCC_DI_reader import \
+    read_DI_for_country_datalad
 import argparse
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--country', help='Country name or code')
 
 args = parser.parse_args()
-
 country = args.country
 
-read_DI_for_country_datalad(
-    country,
-)
+read_DI_for_country_datalad(country)

+ 7 - 132
UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country.py

@@ -1,60 +1,20 @@
 """
-This script is a wrapper around the read_crf_for_country
+This script is a wrapper around the read__for_country
 function such that it can be called from datalad
 """
 
 import argparse
-import sys
-from datetime import date
-from util import code_path
-#from UNFCCC_CRF_reader import custom_country_mapping
-sys.path.append(code_path.name)
-from UNFCCC_DI_reader_core import read_UNFCCC_DI_for_country
-from UNFCCC_DI_reader_core import determine_filename
-from util import custom_country_mapping
-from util import get_country_name
-from util import get_country_code
+from UNFCCC_GHG_data.UNFCCC_DI_reader.UNFCCC_DI_reader_core import \
+    read_UNFCCC_DI_for_country
 
-#from pathlib import Path
-
-suffixes = ["nc", "yaml", "csv"]
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country code')
+parser.add_argument('--date', help='String with current date')
 args = parser.parse_args()
 
-country = args.country
-
-#country_info = {}
-if country in custom_country_mapping:
-    country_code = country
-else:
-    country_code = get_country_code(country)
-# now get the country name
-country_name = get_country_name(country_code)
-#country_info["code"] = country_code
-#country_info["name"] = country_name
-
-# get current date to pass on to other functions in case reading is done over night
-# and the date changes
-date_str = date.today().strftime('%Y-%m-%d')
-
-
-# TODO: this function: get output files and run datalad.
-# problem: the data. should be the same in file name and scenario but is determined
-# at two different places and might differ when running over night.  so it should be
-# a parameter determined in this function and passed on to the datalad function / script
-
-print(f"Attempting to read DI data for {country}.")
-print("#"*80)
-print("")
-print(f"Using the UNFCCC_CRF_reader")
-print("")
-
-# determine output files
-filename_base = determine_filename(country_code, date_str)
-
-# we have no input files as data is read from DI API
+country_code = args.country
+date_str = args.date
 
 read_UNFCCC_DI_for_country(
     country_code=country_code,
@@ -65,88 +25,3 @@ read_UNFCCC_DI_for_country(
     default_gwp=None, # automatically uses right default GWP for AI and NAI
     debug=False,
 )
-
-
-
-#    print(f"Run the script using datalad run via the python api")
-#    script = code_path / "UNFCCC_CRF_reader" / "read_UNFCCC_CRF_submission.py"
-#
-#     cmd = f"./venv/bin/python3 {script.as_posix()} --country={country} "\
-#           f"--submission_year={submission_year} --submission_date={submission_date}"
-#     if re_read:
-#         cmd = cmd + f" --re_read"
-#     datalad.api.run(
-#         cmd=cmd,
-#         dataset=root_path,
-#         message=f"Read data for {country}, CRF{submission_year}, {submission_date}.",
-#         inputs=country_info["input"],
-#         outputs=country_info["output"],
-#         dry_run=None,
-#         explicit=True,
-#     )
-
-
-
-
-################################3
-
-
-
-
-
-
-# country = args.country
-# submission = args.submission
-#
-# codepath = Path(__file__).parent
-# rootpath = codepath / ".." / ".."
-# rootpath = rootpath.resolve()
-#
-# if script_name is not None:
-#     print(f"Found code file {script_name}")
-#     print("")
-#
-#     # get possible input files
-#     input_files = get_possible_inputs(country, submission)
-#     if not input_files:
-#         print(f"No possible input files found for {country}, {submission}. "
-#               f"Something might be wrong here.")
-#     else:
-#         print(f"Found the following input_files:")
-#         for file in input_files:
-#             print(file)
-#         print("")
-#     # make input files absolute to avoid datalad confusions when
-#     # root directory is via symlink
-#     input_files = [rootpath / file for file in input_files]
-#     # convert file's path to str
-#     input_files = [file.as_posix() for file in input_files]
-#
-#     # get possible output files
-#     output_files = get_possible_outputs(country, submission)
-#     if not output_files:
-#         print(f"No possible output files found for {country}, {submission}. "
-#               f"This is either the first run or something is wrong.")
-#     else:
-#         print(f"Found the following output_files:")
-#         for file in output_files:
-#             print(file)
-#         print("")
-#     # convert file path's to str
-#     output_files = [file.as_posix() for file in output_files]
-#
-#     print(f"Run the script using datalad run via the python api")
-#     datalad.api.run(
-#         cmd=f"./venv/bin/python3 {script_name.as_posix()}",
-#         dataset=rootpath,
-#         message=f"Read data for {country}, {submission}.",
-#         inputs=input_files,
-#         outputs=output_files,
-#         dry_run=None,
-#         explicit=True,
-#     )
-# else:
-#     # no code found.
-#     print(f"No code found to read {submission} from {country}")
-#     print(f"Use 'doit country_info --country={country} to get "
-#           f"a list of available submissions and datasets.")

+ 1 - 1
UNFCCC_GHG_data/UNFCCC_DI_reader/util.py

@@ -20,7 +20,7 @@ class NoDIDataError(Exception):
 
 
 # the following is copied from other sub-packages
-# TODO: move these fucntions to common location to allow easy importing into all modules
+# TODO: move these functions to common location to allow easy importing into all modules
 custom_country_mapping = {
     "EUA": "European Union",
     "EUC": "European Union",

+ 23 - 0
dodo.py

@@ -221,7 +221,30 @@ def task_read_new_unfccc_crf_for_year():
         'setup': ['setup_venv'],
     }
 
+# tasks for DI reader
+# datalad run is called from within the read_UNFCCC_DI_for_country.py script
+read_config_di = {
+    "country": get_var('country', None),
+    #"countries": get_var('countries', None),
+}
+
+def task_read_unfccc_di_for_country():
+    """ Read DI data for a country """
+    actions = [
+        f"./venv/bin/python "
+        f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_country_datalad.py "
+        f"--country={read_config_di['country']}",
+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
+        f"--folder=extracted_data/UNFCCC"
+        ]
+    return {
+        'actions': actions,
+        'verbosity': 2,
+        'setup': ['setup_venv'],
+    }
+
 
+# general tasks
 def task_country_info():
     """ Print information on submissions and datasets
     available for given country"""