|
@@ -163,9 +163,15 @@ def read_crf_for_country( # noqa: PLR0912, PLR0915
|
|
|
unknown_categories = []
|
|
|
last_row_info = []
|
|
|
empty_tables = []
|
|
|
+ missing_worksheets = []
|
|
|
for table in tables:
|
|
|
# read table for all years
|
|
|
- ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
|
|
|
+ (
|
|
|
+ ds_table,
|
|
|
+ new_unknown_categories,
|
|
|
+ new_last_row_info,
|
|
|
+ not_present,
|
|
|
+ ) = read_crf_table(
|
|
|
country_code,
|
|
|
table,
|
|
|
submission_year,
|
|
@@ -177,81 +183,103 @@ def read_crf_for_country( # noqa: PLR0912, PLR0915
|
|
|
unknown_categories = unknown_categories + new_unknown_categories
|
|
|
last_row_info = last_row_info + new_last_row_info
|
|
|
|
|
|
- # convert to PRIMAP2 IF
|
|
|
- # first drop the orig_cat_name col as it can have multiple values for
|
|
|
- # one category
|
|
|
- ds_table = ds_table.drop(columns=["orig_cat_name"])
|
|
|
+ if ds_table is not None:
|
|
|
+ # convert to PRIMAP2 IF
|
|
|
+ # first drop the orig_cat_name col as it can have multiple values for
|
|
|
+ # one category
|
|
|
+ ds_table = ds_table.drop(columns=["orig_cat_name"])
|
|
|
|
|
|
- # if we need to map entities pass this info to the conversion function
|
|
|
- if "entity_mapping" in crf_spec[table]:
|
|
|
- entity_mapping = crf_spec[table]["entity_mapping"]
|
|
|
- else:
|
|
|
- entity_mapping = None
|
|
|
- if submission_type == "CRF":
|
|
|
- meta_data_input = {
|
|
|
- "title": f"CRF data submitted in {submission_year} to the UNFCCC "
|
|
|
- f"in the {type_name} ({submission_type}) by {country_name}. "
|
|
|
- f"Submission date: {date_or_version}"
|
|
|
- }
|
|
|
- else:
|
|
|
- meta_data_input = {
|
|
|
- "title": f"Data submitted for round {submission_year} "
|
|
|
- f"to the UNFCCC in the {type_name} ({submission_type}) by "
|
|
|
- f"{country_name}. Submission version: {date_or_version}"
|
|
|
- }
|
|
|
- ds_table_if = convert_crf_table_to_pm2if(
|
|
|
- ds_table,
|
|
|
- submission_year,
|
|
|
- meta_data_input=meta_data_input,
|
|
|
- entity_mapping=entity_mapping,
|
|
|
- submission_type=submission_type,
|
|
|
- )
|
|
|
-
|
|
|
- # skip empty tables
|
|
|
- if (
|
|
|
- not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
|
|
|
- .isna()
|
|
|
- .all(axis=None)
|
|
|
- ):
|
|
|
- # now convert to native PRIMAP2 format
|
|
|
- ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
|
|
|
-
|
|
|
- # if individual data for emissions and removals / recovery exist combine
|
|
|
- # them
|
|
|
- if (
|
|
|
- ("CO2 removals" in ds_table_pm2.data_vars)
|
|
|
- and ("CO2 emissions" in ds_table_pm2.data_vars)
|
|
|
- and "CO2" not in ds_table_pm2.data_vars
|
|
|
- ):
|
|
|
- # we can just sum to CO2 as we made sure that it doesn't exist.
|
|
|
- # If we have CO2 and removals but not emissions, CO2 already has
|
|
|
- # removals subtracted and we do nothing here
|
|
|
- ds_table_pm2["CO2"] = ds_table_pm2[
|
|
|
- ["CO2 emissions", "CO2 removals"]
|
|
|
- ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
- ds_table_pm2["CO2"].attrs["entity"] = "CO2"
|
|
|
+ # if we need to map entities pass this info to the conversion function
|
|
|
+ if "entity_mapping" in crf_spec[table]:
|
|
|
+ entity_mapping = crf_spec[table]["entity_mapping"]
|
|
|
+ else:
|
|
|
+ entity_mapping = None
|
|
|
+ if submission_type == "CRF":
|
|
|
+ meta_data_input = {
|
|
|
+ "title": f"CRF data submitted in {submission_year} to the "
|
|
|
+ f"UNFCCC in the {type_name} ({submission_type}) by "
|
|
|
+ f"{country_name}. "
|
|
|
+ f"Submission date: {date_or_version}"
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ meta_data_input = {
|
|
|
+ "title": f"Data submitted for round {submission_year} "
|
|
|
+ f"to the UNFCCC in the {type_name} ({submission_type}) by "
|
|
|
+ f"{country_name}. Submission version: {date_or_version}"
|
|
|
+ }
|
|
|
+
|
|
|
+ if "decimal_sep" in crf_spec[table]["table"]:
|
|
|
+ decimal_sep = crf_spec[table]["table"]["decimal_sep"]
|
|
|
+ else:
|
|
|
+ decimal_sep = "."
|
|
|
+ if "thousands_sep" in crf_spec[table]["table"]:
|
|
|
+ thousands_sep = crf_spec[table]["table"]["thousands_sep"]
|
|
|
+ else:
|
|
|
+ thousands_sep = ","
|
|
|
+
|
|
|
+ ds_table_if = convert_crf_table_to_pm2if(
|
|
|
+ ds_table,
|
|
|
+ submission_year,
|
|
|
+ meta_data_input=meta_data_input,
|
|
|
+ entity_mapping=entity_mapping,
|
|
|
+ submission_type=submission_type,
|
|
|
+ decimal_sep=decimal_sep,
|
|
|
+ thousands_sep=thousands_sep,
|
|
|
+ )
|
|
|
|
|
|
+ # skip empty tables
|
|
|
if (
|
|
|
- ("CH4 removals" in ds_table_pm2.data_vars)
|
|
|
- and ("CH4 emissions" in ds_table_pm2.data_vars)
|
|
|
- and "CH4" not in ds_table_pm2.data_vars
|
|
|
+ not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
|
|
|
+ .isna()
|
|
|
+ .all(axis=None)
|
|
|
):
|
|
|
- # we can just sum to CH4 as we made sure that it doesn't exist.
|
|
|
- # If we have CH4 and removals but not emissions, CH4 already has
|
|
|
- # removals subtracted and we do nothing here
|
|
|
- ds_table_pm2["CH4"] = ds_table_pm2[
|
|
|
- ["CH4 emissions", "CH4 removals"]
|
|
|
- ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
- ds_table_pm2["CH4"].attrs["entity"] = "CH4"
|
|
|
-
|
|
|
- # combine per table DS
|
|
|
- if ds_all is None:
|
|
|
- ds_all = ds_table_pm2
|
|
|
+ # now convert to native PRIMAP2 format
|
|
|
+ ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
|
|
|
+
|
|
|
+ # if individual data for emissions and removals / recovery exist
|
|
|
+ # combine them
|
|
|
+ if (
|
|
|
+ ("CO2 removals" in ds_table_pm2.data_vars)
|
|
|
+ and ("CO2 emissions" in ds_table_pm2.data_vars)
|
|
|
+ and "CO2" not in ds_table_pm2.data_vars
|
|
|
+ ):
|
|
|
+ # we can just sum to CO2 as we made sure that it doesn't exist.
|
|
|
+ # If we have CO2 and removals but not emissions, CO2 already has
|
|
|
+ # removals subtracted and we do nothing here
|
|
|
+ ds_table_pm2["CO2"] = ds_table_pm2[
|
|
|
+ ["CO2 emissions", "CO2 removals"]
|
|
|
+ ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
+ ds_table_pm2["CO2"].attrs["entity"] = "CO2"
|
|
|
+
|
|
|
+ if (
|
|
|
+ ("CH4 removals" in ds_table_pm2.data_vars)
|
|
|
+ and ("CH4 emissions" in ds_table_pm2.data_vars)
|
|
|
+ and "CH4" not in ds_table_pm2.data_vars
|
|
|
+ ):
|
|
|
+ # we can just sum to CH4 as we made sure that it doesn't exist.
|
|
|
+ # If we have CH4 and removals but not emissions, CH4 already has
|
|
|
+ # removals subtracted and we do nothing here
|
|
|
+ ds_table_pm2["CH4"] = ds_table_pm2[
|
|
|
+ ["CH4 emissions", "CH4 removals"]
|
|
|
+ ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
+ ds_table_pm2["CH4"].attrs["entity"] = "CH4"
|
|
|
+
|
|
|
+ # combine per table DS
|
|
|
+ if ds_all is None:
|
|
|
+ ds_all = ds_table_pm2
|
|
|
+ else:
|
|
|
+ ds_all = ds_all.combine_first(ds_table_pm2)
|
|
|
else:
|
|
|
- ds_all = ds_all.combine_first(ds_table_pm2)
|
|
|
+ # log that table is empty
|
|
|
+ empty_tables.append([table, country_code, ""])
|
|
|
+ elif not_present:
|
|
|
+ # log that table is not present
|
|
|
+ missing_worksheets.append([table, country_code, ""])
|
|
|
else:
|
|
|
- # log that table is empty
|
|
|
- empty_tables.append(table)
|
|
|
+ print(
|
|
|
+ f"Empty DataFrame returned for table {table}, "
|
|
|
+ f"country {country_code}. Check log for errors."
|
|
|
+ )
|
|
|
|
|
|
# check if there were log messages.
|
|
|
save_data = True
|
|
@@ -296,6 +324,19 @@ def read_crf_for_country( # noqa: PLR0912, PLR0915
|
|
|
)
|
|
|
save_empty_tables_info(empty_tables, log_location)
|
|
|
|
|
|
+ if len(missing_worksheets) > 0:
|
|
|
+ today = date.today()
|
|
|
+ log_location = (
|
|
|
+ log_path
|
|
|
+ / f"{submission_type}{submission_year}"
|
|
|
+ / f"{country_code}_missing_tables_{today.strftime('%Y-%m-%d')}.csv"
|
|
|
+ )
|
|
|
+ print(
|
|
|
+ f"Missing worksheets for {country_code}: "
|
|
|
+ f"{empty_tables}. Save log to {log_location}"
|
|
|
+ )
|
|
|
+ save_empty_tables_info(missing_worksheets, log_location)
|
|
|
+
|
|
|
if save_data:
|
|
|
compression = dict(zlib=True, complevel=9)
|
|
|
output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
|