|
@@ -74,6 +74,7 @@ def read_year_to_test_specs( # noqa: PLR0912, PLR0915
|
|
|
exceptions = []
|
|
|
unknown_categories = []
|
|
|
last_row_info = []
|
|
|
+ empty_tables = []
|
|
|
ds_all = None
|
|
|
print(
|
|
|
f"{submission_type} test reading for {submission_type}{submission_year}. "
|
|
@@ -213,42 +214,52 @@ def read_year_to_test_specs( # noqa: PLR0912, PLR0915
|
|
|
submission_type=submission_type,
|
|
|
)
|
|
|
|
|
|
- # now convert to native PRIMAP2 format
|
|
|
- ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
|
|
|
-
|
|
|
- # if individual data for emissions and removals / recovery exist
|
|
|
- # combine them
|
|
|
+ # skip empty tables
|
|
|
if (
|
|
|
- ("CO2 removals" in ds_table_pm2.data_vars)
|
|
|
- and ("CO2 emissions" in ds_table_pm2.data_vars)
|
|
|
- and "CO2" not in ds_table_pm2.data_vars
|
|
|
+ not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
|
|
|
+ .isna()
|
|
|
+ .all(axis=None)
|
|
|
):
|
|
|
- # we can just sum to CO2 as we made sure that it doesn't exist.
|
|
|
- # If we have CO2 and removals but not emissions, CO2 already has
|
|
|
- # removals subtracted and we do nothing here
|
|
|
- ds_table_pm2["CO2"] = ds_table_pm2[
|
|
|
- ["CO2 emissions", "CO2 removals"]
|
|
|
- ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
- ds_table_pm2["CO2"].attrs["entity"] = "CO2"
|
|
|
-
|
|
|
- if (
|
|
|
- ("CH4 removals" in ds_table_pm2.data_vars)
|
|
|
- and ("CH4 emissions" in ds_table_pm2.data_vars)
|
|
|
- and "CH4" not in ds_table_pm2.data_vars
|
|
|
- ):
|
|
|
- # we can just sum to CH4 as we made sure that it doesn't exist.
|
|
|
- # If we have CH4 and removals but not emissions, CH4 already has
|
|
|
- # removals subtracted and we do nothing here
|
|
|
- ds_table_pm2["CH4"] = ds_table_pm2[
|
|
|
- ["CH4 emissions", "CH4 removals"]
|
|
|
- ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
- ds_table_pm2["CH4"].attrs["entity"] = "CH4"
|
|
|
-
|
|
|
- # combine per table DS
|
|
|
- if ds_all is None:
|
|
|
- ds_all = ds_table_pm2
|
|
|
+ # now convert to native PRIMAP2 format
|
|
|
+ ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
|
|
|
+
|
|
|
+ # if individual data for emissions and removals / recovery exist
|
|
|
+ # combine them
|
|
|
+ if (
|
|
|
+ ("CO2 removals" in ds_table_pm2.data_vars)
|
|
|
+ and ("CO2 emissions" in ds_table_pm2.data_vars)
|
|
|
+ and "CO2" not in ds_table_pm2.data_vars
|
|
|
+ ):
|
|
|
+ # we can just sum to CO2 as we made sure that it doesn't
|
|
|
+ # exist.
|
|
|
+ # If we have CO2 and removals but not emissions, CO2 already
|
|
|
+ # has removals subtracted and we do nothing here
|
|
|
+ ds_table_pm2["CO2"] = ds_table_pm2[
|
|
|
+ ["CO2 emissions", "CO2 removals"]
|
|
|
+ ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
+ ds_table_pm2["CO2"].attrs["entity"] = "CO2"
|
|
|
+
|
|
|
+ if (
|
|
|
+ ("CH4 removals" in ds_table_pm2.data_vars)
|
|
|
+ and ("CH4 emissions" in ds_table_pm2.data_vars)
|
|
|
+ and "CH4" not in ds_table_pm2.data_vars
|
|
|
+ ):
|
|
|
+ # we can just sum to CH4 as we made sure that it doesn't
|
|
|
+ # exist.
|
|
|
+ # If we have CH4 and removals but not emissions, CH4 already
|
|
|
+ # has removals subtracted and we do nothing here
|
|
|
+ ds_table_pm2["CH4"] = ds_table_pm2[
|
|
|
+ ["CH4 emissions", "CH4 removals"]
|
|
|
+ ].pr.sum(dim="entity", skipna=True, min_count=1)
|
|
|
+ ds_table_pm2["CH4"].attrs["entity"] = "CH4"
|
|
|
+
|
|
|
+ # combine per table DS
|
|
|
+ if ds_all is None:
|
|
|
+ ds_all = ds_table_pm2
|
|
|
+ else:
|
|
|
+ ds_all = ds_all.combine_first(ds_table_pm2)
|
|
|
else:
|
|
|
- ds_all = ds_all.combine_first(ds_table_pm2)
|
|
|
+ empty_tables.append([table, current_country_code, data_year])
|
|
|
except Exception as e:
|
|
|
message = (
|
|
|
f"Error occurred when converting table {table} for"
|
|
@@ -291,6 +302,21 @@ def read_year_to_test_specs( # noqa: PLR0912, PLR0915
|
|
|
print(f"Data found in the last row. Saving log to " f"{log_location}")
|
|
|
save_last_row_info(last_row_info, log_location)
|
|
|
|
|
|
+ if len(empty_tables) > 0:
|
|
|
+ today = date.today()
|
|
|
+ if country_code is not None:
|
|
|
+ log_location = (
|
|
|
+ output_folder / f"{data_year}_empty_tables_{country_code}_"
|
|
|
+ f"{today.strftime('%Y-%m-%d')}.csv"
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ log_location = (
|
|
|
+ output_folder / f"{data_year}_empty_tables_"
|
|
|
+ f"{today.strftime('%Y-%m-%d')}.csv"
|
|
|
+ )
|
|
|
+ print(f"Empty tables found:. Save log to {log_location}")
|
|
|
+ save_empty_tables_info(empty_tables, log_location)
|
|
|
+
|
|
|
# write exceptions
|
|
|
f_ex = open(
|
|
|
output_folder / f"{data_year}_exceptions_{today.strftime('%Y-%m-%d')}.txt", "w"
|
|
@@ -447,3 +473,50 @@ def save_last_row_info(
|
|
|
processed_last_row_info, columns=["Table", "Country", "Categories"]
|
|
|
)
|
|
|
df_processed_lost_row_info.to_csv(file, index=False)
|
|
|
+
|
|
|
+
|
|
|
+def save_empty_tables_info(
|
|
|
+ empty_tables: list[list],
|
|
|
+ file: Path,
|
|
|
+) -> None:
|
|
|
+ """
|
|
|
+ Save information on empty tables to a csv file.
|
|
|
+
|
|
|
+ Parameters
|
|
|
+ ----------
|
|
|
+ empty_tables: List[List]
|
|
|
+ List of lists with information on the empty tables.
|
|
|
+ (which table, country and year)
|
|
|
+
|
|
|
+ file: pathlib.Path
|
|
|
+ File including path where the data should be stored
|
|
|
+
|
|
|
+ """
|
|
|
+ # process unknown categories
|
|
|
+ df_empty_tables = pd.DataFrame(empty_tables, columns=["Table", "Country", "Year"])
|
|
|
+
|
|
|
+ processed_tables = []
|
|
|
+ all_tables = df_empty_tables["Table"].unique()
|
|
|
+ all_years = set(df_empty_tables["Year"].unique())
|
|
|
+ all_years = set([year for year in all_years if isinstance(year, int)])
|
|
|
+ all_years = set([year for year in all_years if int(year) > 1989]) # noqa: PLR2004
|
|
|
+ for table in all_tables:
|
|
|
+ df_current_table = df_empty_tables[df_empty_tables["Table"] == table]
|
|
|
+ all_countries = df_current_table["Country"].unique()
|
|
|
+ countries_table = ""
|
|
|
+ for country in all_countries:
|
|
|
+ years_country = df_current_table[df_current_table["Country"] == country][
|
|
|
+ "Year"
|
|
|
+ ].unique()
|
|
|
+ if set(years_country) == all_years:
|
|
|
+ countries_table = f"{countries_table}; {country}"
|
|
|
+ else:
|
|
|
+ countries_table = f"{countries_table}; {country} ({years_country})"
|
|
|
+ processed_tables.append([table, countries_table])
|
|
|
+
|
|
|
+ if not file.parents[1].exists():
|
|
|
+ file.parents[1].mkdir()
|
|
|
+ if not file.parents[0].exists():
|
|
|
+ file.parents[0].mkdir()
|
|
|
+ df_processed_tables = pd.DataFrame(processed_tables, columns=["Table", "Countries"])
|
|
|
+ df_processed_tables.to_csv(file, index=False)
|