2 years ago · 72959753f1
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ __pycache__
 
				 /JG_test_code/
			
 
				 .doit.db
			
 
				 log
			
 
				+datasets
			
--- a/code/UNFCCC_CRF_reader/CRF_raw_for_year.py
+++ b/code/UNFCCC_CRF_reader/CRF_raw_for_year.py
@@ -0,0 +1,97 @@
 
				+"""
			
 
				+This script collects all latest CRF submissions for a given year
			
 
				+
			
 
				+Currently it only checks the extracted_data folder and not if new
			
 
				+submission are available in the downloaded data folder.
			
 
				+"""
			
 
				+
			
 
				+import argparse
			
 
				+import sys
			
 
				+import primap2 as pm2
			
 
				+from pathlib import Path
			
 
				+from datetime import date
			
 
				+
			
 
				+root_path = Path(__file__).parents[2].absolute()
			
 
				+root_path = root_path.resolve()
			
 
				+#log_path = root_path / "log"
			
 
				+code_path = root_path / "code"
			
 
				+downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
			
 
				+extracted_data_path = root_path / "extracted_data" / "UNFCCC"
			
 
				+dataset_path = root_path / "datasets" / "UNFCCC"
			
 
				+
			
 
				+sys.path.append(code_path.name)
			
 
				+
			
 
				+from UNFCCC_CRF_reader.util import all_crf_countries
			
 
				+from UNFCCC_CRF_reader.UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
			
 
				+from UNFCCC_CRF_reader.UNFCCC_CRF_reader_prod import submission_has_been_read
			
 
				+
			
 
				+parser = argparse.ArgumentParser()
			
 
				+parser.add_argument('--submission_year', help='Submission round to read', type=int)
			
 
				+args = parser.parse_args()
			
 
				+submission_year = args.submission_year
			
 
				+
			
 
				+ds_all_CRF = None
			
 
				+outdated_countries = []
			
 
				+included_countries = []
			
 
				+
			
 
				+for country in all_crf_countries:
			
 
				+    # determine folder
			
 
				+    try:
			
 
				+        country_info = get_input_and_output_files_for_country(
			
 
				+            country, submission_year=submission_year, verbose=False)
			
 
				+
			
 
				+        # check if the latest submission has been read already
			
 
				+
			
 
				+        data_read = submission_has_been_read(
			
 
				+            country_info["code"], country_info["name"],
			
 
				+            submission_year=submission_year,
			
 
				+            submission_date=country_info["date"],
			
 
				+            verbose=False,
			
 
				+        )
			
 
				+        if not data_read:
			
 
				+            print(f"Latest submission for {country} has not been read yet.")
			
 
				+            outdated_countries.append(country)
			
 
				+
			
 
				+        # read the native format file
			
 
				+        input_files = [file for file in country_info["input"] if file.suffix == ".nc"]
			
 
				+
			
 
				+        ds_country = pm2.open_dataset(input_files[0].as_posix())
			
 
				+
			
 
				+        # combine per table DS
			
 
				+        if ds_all_CRF is None:
			
 
				+            ds_all_CRF = ds_country
			
 
				+        else:
			
 
				+            ds_all_CRF = ds_all_CRF.combine_first(ds_country)
			
 
				+
			
 
				+        included_countries.append(country)
			
 
				+
			
 
				+    except Exception as ex:
			
 
				+        print(f"Exception {ex} occurred for {country}")
			
 
				+
			
 
				+
			
 
				+# Update metadata
			
 
				+# not necessary
			
 
				+
			
 
				+# write to disc
			
 
				+today = date.today()
			
 
				+
			
 
				+compression = dict(zlib=True, complevel=9)
			
 
				+output_folder = dataset_path / f"CRF{submission_year}"
			
 
				+output_filename = f"CRF{submission_year}_raw_{today.strftime('%Y-%m-%d')}"
			
 
				+
			
 
				+if not output_folder.exists():
			
 
				+    output_folder.mkdir()
			
 
				+
			
 
				+# write data in interchange format
			
 
				+pm2.pm2io.write_interchange_format(output_folder / output_filename,
			
 
				+                                   ds_all_CRF.pr.to_interchange_format())
			
 
				+
			
 
				+# write data in native PRIMAP2 format
			
 
				+encoding = {var: compression for var in ds_all_CRF.data_vars}
			
 
				+ds_all_CRF.pr.to_netcdf(output_folder / (output_filename + ".nc"),
			
 
				+                      encoding=encoding)
			
 
				+
			
 
				+# show info
			
 
				+print(f"The following countries are included in the dataset: {included_countries}")
			
 
				+print(f"The following countries have updated submission not yet read "
			
 
				+      f"and not included in the dataset: {outdated_countries}")