před 2 měsíci · 6a2aba899a
--- a/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/__init__.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/__init__.py
@@ -7,6 +7,7 @@ from .crf2022_specification import CRF2022
 
				 from .crf2023_aus_specification import CRF2023_AUS
			
 
				 from .crf2023_specification import CRF2023
			
 
				 from .crf2024_specification import CRF2024
			
 
				+from .crt1_pry_specification import CRT1_PRY
			
 
				 from .crt1_specification import CRT1
			
 
				 from .crt1_tun_specification import CRT1_TUN
			
 
				 
			
@@ -17,5 +18,6 @@ __all__ = [
 
				     "CRF2023_AUS",
			
 
				     "CRF2024",
			
 
				     "CRT1",
			
 
				+    "CRT1_PRY",
			
 
				     "CRT1_TUN",
			
 
				 ]
			
--- a/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_chl_specification.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_chl_specification.py
@@ -0,0 +1,20 @@
 
				+"""
			
 
				+
			
 
				+CRT1 specification for Chile.
			
 
				+
			
 
				+Header in Table3.B(b) differs. This is a quick fix. In the future the column matching
			
 
				+should be improved to allow for different column names at least for ignored columns
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+from copy import deepcopy
			
 
				+
			
 
				+from .crt1_specification import CRT1
			
 
				+
			
 
				+gwp_to_use = "AR5GWP100"
			
 
				+
			
 
				+CRT1_CHL = deepcopy(CRT1)
			
 
				+
			
 
				+CRT1_CHL["Table3.B(b)"]["table"]["cols_to_ignore"][
			
 
				+    3
			
 
				+] = "ACTIVITY DATA AND OTHER RELATED INFORMATION Typical animal mass (average) (kg/ animal)"
			
--- a/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_pry_specification.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_pry_specification.py
--- a/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py
--- a/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
@@ -32,7 +32,7 @@ pd.set_option("future.no_silent_downcasting", True)
 
				 
			
 
				 
			
 
				 ### reading functions
			
 
				-def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913
			
 
				+def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913, PLR0915
			
 
				     df_table: pd.DataFrame,
			
 
				     submission_year: int,
			
 
				     entity_mapping: dict[str, str] | None = None,
			
@@ -42,6 +42,7 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913
 
				     meta_data_input: dict[str, str] | None = None,
			
 
				     submission_type: str = "CRF",
			
 
				     decimal_sep: str = ".",
			
 
				+    thousands_sep: str = ",",
			
 
				 ) -> pd.DataFrame:
			
 
				     """
			
 
				     Convert a given pandas long format crf table to PRIMAP2 interchange format
			
@@ -69,6 +70,8 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913
 
				         read CRF or CRF data
			
 
				     decimal_sep: str default = '.'
			
 
				         decimal separator to use to interpret the data.
			
 
				+    thousands_sep: str default = ','
			
 
				+        thousands separator to use to interpret the data.
			
 
				 
			
 
				     Returns
			
 
				     -------
			
@@ -178,6 +181,25 @@ def convert_crf_table_to_pm2if(  # noqa: PLR0912, PLR0913
 
				             meta_data[key] = meta_data_input[key]
			
 
				 
			
 
				     # fix decimal separator
			
 
				+    sep_regexp_special = ["."]
			
 
				+
			
 
				+    if decimal_sep != ".":
			
 
				+        if thousands_sep in sep_regexp_special:
			
 
				+            regex_thousands = f"([0-9]+)\\{thousands_sep}([0-9,]+)"
			
 
				+        else:
			
 
				+            regex_thousands = f"([0-9]+){thousands_sep}([0-9,]+)"
			
 
				+        if decimal_sep in sep_regexp_special:
			
 
				+            regex_decimal = f"([0-9]+)\\{decimal_sep}([0-9]+)"
			
 
				+        else:
			
 
				+            regex_decimal = f"([0-9]+){decimal_sep}([0-9]+)"
			
 
				+        # first remove thousand sep
			
 
				+        df_table = df_table.replace(
			
 
				+            to_replace=regex_thousands, value=r"\1\2", regex=True
			
 
				+        )
			
 
				+        # now replace the decimal sep by a dot
			
 
				+        df_table = df_table.replace(
			
 
				+            to_replace=regex_decimal, value=r"\1.\2", regex=True
			
 
				+        )
			
 
				 
			
 
				     df_table_if = pm2.pm2io.convert_long_dataframe_if(
			
 
				         df_table,
			
@@ -204,7 +226,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
				     folder: str | None = None,
			
 
				     submission_type: str = "CRF",
			
 
				     debug: bool = False,
			
 
				-) -> tuple[pd.DataFrame, list[list], list[list]]:
			
 
				+) -> tuple[pd.DataFrame, list[list], list[list], bool]:
			
 
				     """
			
 
				     Read CRF table for given year and country/countries
			
 
				 
			
@@ -246,6 +268,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
				         * Third return parameter holds information on data found in the last read row.
			
 
				           This is used as a hint to check if table specifications might have to
			
 
				           be adapted as country submitted tables are longer than expected.
			
 
				+        * The fourth return parameter is true if the worksheet to read in the file
			
 
				 
			
 
				     """
			
 
				     # check type
			
@@ -342,6 +365,7 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
				     df_all = None
			
 
				     unknown_rows = []
			
 
				     last_row_info = []
			
 
				+    not_present = False
			
 
				     for file in input_files:
			
 
				         file_info = get_info_from_crf_filename(file.name)
			
 
				         try:
			
@@ -359,10 +383,17 @@ def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
 
				                 df_all = pd.concat([df_this_file, df_all])
			
 
				                 unknown_rows = unknown_rows + unknown_rows_this_file
			
 
				                 last_row_info = last_row_info + last_row_info_this_file
			
 
				+        except ValueError as e:
			
 
				+            if e.args[0] == f"Worksheet named '{table}' not found":
			
 
				+                print(f"Table {table} not present")
			
 
				+                not_present = True
			
 
				+                pass
			
 
				+            else:
			
 
				+                print(f"Error when reading file {file}. Skipping file. Exception: {e}")
			
 
				         except Exception as e:
			
 
				             print(f"Error when reading file {file}. Skipping file. Exception: {e}")
			
 
				 
			
 
				-    return df_all, unknown_rows, last_row_info
			
 
				+    return df_all, unknown_rows, last_row_info, not_present
			
 
				 
			
 
				 
			
 
				 def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
			
--- a/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py
@@ -75,6 +75,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
 
				     unknown_categories = []
			
 
				     last_row_info = []
			
 
				     empty_tables = []
			
 
				+    missing_worksheets = []
			
 
				     ds_all = None
			
 
				     print(
			
 
				         f"{submission_type} test reading for {submission_type}{submission_year}. "
			
@@ -173,6 +174,7 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
 
				                         ds_table,
			
 
				                         new_unknown_categories,
			
 
				                         new_last_row_info,
			
 
				+                        not_present,
			
 
				                     ) = read_crf_table(
			
 
				                         current_country_code,
			
 
				                         table,
			
@@ -187,79 +189,108 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
 
				                     unknown_categories = unknown_categories + new_unknown_categories
			
 
				                     last_row_info = last_row_info + new_last_row_info
			
 
				 
			
 
				-                    # convert to PRIMAP2 IF
			
 
				-                    # first drop the orig_cat_name col as it can have multiple values
			
 
				-                    # for one category
			
 
				-                    ds_table = ds_table.drop(columns=["orig_cat_name"])
			
 
				-
			
 
				-                    # TODO: catch entity conversion errors and make list of error
			
 
				-                    #  entities
			
 
				-                    # if we need to map entities pass this info to the conversion
			
 
				-                    # function
			
 
				-                    if "entity_mapping" in crf_spec[table]:
			
 
				-                        entity_mapping = crf_spec[table]["entity_mapping"]
			
 
				-                    else:
			
 
				-                        entity_mapping = None
			
 
				-
			
 
				-                    ds_table_if = convert_crf_table_to_pm2if(
			
 
				-                        ds_table,
			
 
				-                        submission_year,
			
 
				-                        meta_data_input={
			
 
				-                            "title": f"Data submitted in {submission_year} to the "
			
 
				-                            f"UNFCCC in the {type_name} ({submission_type}) "
			
 
				-                            f"by {country_name}. "
			
 
				-                            f"Submission date / version: {date_or_version}"
			
 
				-                        },
			
 
				-                        entity_mapping=entity_mapping,
			
 
				-                        submission_type=submission_type,
			
 
				-                    )
			
 
				-
			
 
				-                    # skip empty tables
			
 
				-                    if (
			
 
				-                        not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
			
 
				-                        .isna()
			
 
				-                        .all(axis=None)
			
 
				-                    ):
			
 
				-                        # now convert to native PRIMAP2 format
			
 
				-                        ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
			
 
				-
			
 
				-                        # if individual data for emissions and removals / recovery exist
			
 
				-                        # combine them
			
 
				-                        if (
			
 
				-                            ("CO2 removals" in ds_table_pm2.data_vars)
			
 
				-                            and ("CO2 emissions" in ds_table_pm2.data_vars)
			
 
				-                            and "CO2" not in ds_table_pm2.data_vars
			
 
				-                        ):
			
 
				-                            # we can just sum to CO2 as we made sure that it doesn't
			
 
				-                            # exist.
			
 
				-                            # If we have CO2 and removals but not emissions, CO2 already
			
 
				-                            # has removals subtracted and we do nothing here
			
 
				-                            ds_table_pm2["CO2"] = ds_table_pm2[
			
 
				-                                ["CO2 emissions", "CO2 removals"]
			
 
				-                            ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				-                            ds_table_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				+                    if ds_table is not None:
			
 
				+                        # convert to PRIMAP2 IF
			
 
				+                        # first drop the orig_cat_name col as it can have multiple
			
 
				+                        # values for one category
			
 
				+                        ds_table = ds_table.drop(columns=["orig_cat_name"])
			
 
				+
			
 
				+                        # TODO: catch entity conversion errors and make list of error
			
 
				+                        #  entities
			
 
				+                        # if we need to map entities pass this info to the conversion
			
 
				+                        # function
			
 
				+                        if "entity_mapping" in crf_spec[table]:
			
 
				+                            entity_mapping = crf_spec[table]["entity_mapping"]
			
 
				+                        else:
			
 
				+                            entity_mapping = None
			
 
				 
			
 
				+                        if "decimal_sep" in crf_spec[table]["table"]:
			
 
				+                            decimal_sep = crf_spec[table]["table"]["decimal_sep"]
			
 
				+                        else:
			
 
				+                            decimal_sep = "."
			
 
				+                        if "thousands_sep" in crf_spec[table]["table"]:
			
 
				+                            thousands_sep = crf_spec[table]["table"]["thousands_sep"]
			
 
				+                        else:
			
 
				+                            thousands_sep = ","
			
 
				+
			
 
				+                        ds_table_if = convert_crf_table_to_pm2if(
			
 
				+                            ds_table,
			
 
				+                            submission_year,
			
 
				+                            meta_data_input={
			
 
				+                                "title": f"Data submitted in {submission_year} to the "
			
 
				+                                f"UNFCCC in the {type_name} ({submission_type}) "
			
 
				+                                f"by {country_name}. "
			
 
				+                                f"Submission date / version: {date_or_version}"
			
 
				+                            },
			
 
				+                            entity_mapping=entity_mapping,
			
 
				+                            submission_type=submission_type,
			
 
				+                            decimal_sep=decimal_sep,
			
 
				+                            thousands_sep=thousands_sep,
			
 
				+                        )
			
 
				+
			
 
				+                        # skip empty tables
			
 
				                         if (
			
 
				-                            ("CH4 removals" in ds_table_pm2.data_vars)
			
 
				-                            and ("CH4 emissions" in ds_table_pm2.data_vars)
			
 
				-                            and "CH4" not in ds_table_pm2.data_vars
			
 
				+                            not ds_table_if.set_index(
			
 
				+                                ds_table_if.attrs["dimensions"]["*"]
			
 
				+                            )
			
 
				+                            .isna()
			
 
				+                            .all(axis=None)
			
 
				                         ):
			
 
				-                            # we can just sum to CH4 as we made sure that it doesn't
			
 
				-                            # exist.
			
 
				-                            # If we have CH4 and removals but not emissions, CH4 already
			
 
				-                            # has removals subtracted and we do nothing here
			
 
				-                            ds_table_pm2["CH4"] = ds_table_pm2[
			
 
				-                                ["CH4 emissions", "CH4 removals"]
			
 
				-                            ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				-                            ds_table_pm2["CH4"].attrs["entity"] = "CH4"
			
 
				-
			
 
				-                        # combine per table DS
			
 
				-                        if ds_all is None:
			
 
				-                            ds_all = ds_table_pm2
			
 
				+                            # now convert to native PRIMAP2 format
			
 
				+                            ds_table_pm2 = pm2.pm2io.from_interchange_format(
			
 
				+                                ds_table_if
			
 
				+                            )
			
 
				+
			
 
				+                            # if individual data for emissions and removals /
			
 
				+                            # recovery exist combine them
			
 
				+                            if (
			
 
				+                                ("CO2 removals" in ds_table_pm2.data_vars)
			
 
				+                                and ("CO2 emissions" in ds_table_pm2.data_vars)
			
 
				+                                and "CO2" not in ds_table_pm2.data_vars
			
 
				+                            ):
			
 
				+                                # we can just sum to CO2 as we made sure that it doesn't
			
 
				+                                # exist.
			
 
				+                                # If we have CO2 and removals but not emissions,
			
 
				+                                # CO2 already
			
 
				+                                # has removals subtracted and we do nothing here
			
 
				+                                ds_table_pm2["CO2"] = ds_table_pm2[
			
 
				+                                    ["CO2 emissions", "CO2 removals"]
			
 
				+                                ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				+                                ds_table_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				+
			
 
				+                            if (
			
 
				+                                ("CH4 removals" in ds_table_pm2.data_vars)
			
 
				+                                and ("CH4 emissions" in ds_table_pm2.data_vars)
			
 
				+                                and "CH4" not in ds_table_pm2.data_vars
			
 
				+                            ):
			
 
				+                                # we can just sum to CH4 as we made sure that it doesn't
			
 
				+                                # exist.
			
 
				+                                # If we have CH4 and removals but not emissions, CH4
			
 
				+                                # already has removals subtracted and we do nothing here
			
 
				+                                ds_table_pm2["CH4"] = ds_table_pm2[
			
 
				+                                    ["CH4 emissions", "CH4 removals"]
			
 
				+                                ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				+                                ds_table_pm2["CH4"].attrs["entity"] = "CH4"
			
 
				+
			
 
				+                            # combine per table DS
			
 
				+                            if ds_all is None:
			
 
				+                                ds_all = ds_table_pm2
			
 
				+                            else:
			
 
				+                                ds_all = ds_all.combine_first(ds_table_pm2)
			
 
				                         else:
			
 
				-                            ds_all = ds_all.combine_first(ds_table_pm2)
			
 
				+                            empty_tables.append(
			
 
				+                                [table, current_country_code, data_year]
			
 
				+                            )
			
 
				+                    elif not_present:
			
 
				+                        # log that table is not present
			
 
				+                        missing_worksheets.append(
			
 
				+                            [table, current_country_code, data_year]
			
 
				+                        )
			
 
				                     else:
			
 
				-                        empty_tables.append([table, current_country_code, data_year])
			
 
				+                        print(
			
 
				+                            f"Empty DataFrame returned for table {table}, "
			
 
				+                            f"country {country_code}. Check log for errors."
			
 
				+                        )
			
 
				                 except Exception as e:
			
 
				                     message = (
			
 
				                         f"Error occurred when converting table {table} for"
			
@@ -317,6 +348,21 @@ def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
 
				         print(f"Empty tables found:. Save log to {log_location}")
			
 
				         save_empty_tables_info(empty_tables, log_location)
			
 
				 
			
 
				+    if len(missing_worksheets) > 0:
			
 
				+        today = date.today()
			
 
				+        if country_code is not None:
			
 
				+            log_location = (
			
 
				+                output_folder / f"{data_year}_missing_tables_{country_code}_"
			
 
				+                f"{today.strftime('%Y-%m-%d')}.csv"
			
 
				+            )
			
 
				+        else:
			
 
				+            log_location = (
			
 
				+                output_folder / f"{data_year}_missing_tables_"
			
 
				+                f"{today.strftime('%Y-%m-%d')}.csv"
			
 
				+            )
			
 
				+        print(f"Missing worksheets. Save log to {log_location}")
			
 
				+        save_empty_tables_info(missing_worksheets, log_location)
			
 
				+
			
 
				     # write exceptions
			
 
				     f_ex = open(
			
 
				         output_folder / f"{data_year}_exceptions_{today.strftime('%Y-%m-%d')}.txt", "w"
			
--- a/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py
+++ b/src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py
@@ -163,9 +163,15 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
 
				         unknown_categories = []
			
 
				         last_row_info = []
			
 
				         empty_tables = []
			
 
				+        missing_worksheets = []
			
 
				         for table in tables:
			
 
				             # read table for all years
			
 
				-            ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
			
 
				+            (
			
 
				+                ds_table,
			
 
				+                new_unknown_categories,
			
 
				+                new_last_row_info,
			
 
				+                not_present,
			
 
				+            ) = read_crf_table(
			
 
				                 country_code,
			
 
				                 table,
			
 
				                 submission_year,
			
@@ -177,81 +183,103 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
 
				             unknown_categories = unknown_categories + new_unknown_categories
			
 
				             last_row_info = last_row_info + new_last_row_info
			
 
				 
			
 
				-            # convert to PRIMAP2 IF
			
 
				-            # first drop the orig_cat_name col as it can have multiple values for
			
 
				-            # one category
			
 
				-            ds_table = ds_table.drop(columns=["orig_cat_name"])
			
 
				+            if ds_table is not None:
			
 
				+                # convert to PRIMAP2 IF
			
 
				+                # first drop the orig_cat_name col as it can have multiple values for
			
 
				+                # one category
			
 
				+                ds_table = ds_table.drop(columns=["orig_cat_name"])
			
 
				 
			
 
				-            # if we need to map entities pass this info to the conversion function
			
 
				-            if "entity_mapping" in crf_spec[table]:
			
 
				-                entity_mapping = crf_spec[table]["entity_mapping"]
			
 
				-            else:
			
 
				-                entity_mapping = None
			
 
				-            if submission_type == "CRF":
			
 
				-                meta_data_input = {
			
 
				-                    "title": f"CRF data submitted in {submission_year} to the UNFCCC "
			
 
				-                    f"in the {type_name} ({submission_type}) by {country_name}. "
			
 
				-                    f"Submission date: {date_or_version}"
			
 
				-                }
			
 
				-            else:
			
 
				-                meta_data_input = {
			
 
				-                    "title": f"Data submitted for round {submission_year} "
			
 
				-                    f"to the UNFCCC in the {type_name} ({submission_type}) by "
			
 
				-                    f"{country_name}. Submission version: {date_or_version}"
			
 
				-                }
			
 
				-            ds_table_if = convert_crf_table_to_pm2if(
			
 
				-                ds_table,
			
 
				-                submission_year,
			
 
				-                meta_data_input=meta_data_input,
			
 
				-                entity_mapping=entity_mapping,
			
 
				-                submission_type=submission_type,
			
 
				-            )
			
 
				-
			
 
				-            # skip empty tables
			
 
				-            if (
			
 
				-                not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
			
 
				-                .isna()
			
 
				-                .all(axis=None)
			
 
				-            ):
			
 
				-                # now convert to native PRIMAP2 format
			
 
				-                ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
			
 
				-
			
 
				-                # if individual data for emissions and removals / recovery exist combine
			
 
				-                # them
			
 
				-                if (
			
 
				-                    ("CO2 removals" in ds_table_pm2.data_vars)
			
 
				-                    and ("CO2 emissions" in ds_table_pm2.data_vars)
			
 
				-                    and "CO2" not in ds_table_pm2.data_vars
			
 
				-                ):
			
 
				-                    # we can just sum to CO2 as we made sure that it doesn't exist.
			
 
				-                    # If we have CO2 and removals but not emissions, CO2 already has
			
 
				-                    # removals subtracted and we do nothing here
			
 
				-                    ds_table_pm2["CO2"] = ds_table_pm2[
			
 
				-                        ["CO2 emissions", "CO2 removals"]
			
 
				-                    ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				-                    ds_table_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				+                # if we need to map entities pass this info to the conversion function
			
 
				+                if "entity_mapping" in crf_spec[table]:
			
 
				+                    entity_mapping = crf_spec[table]["entity_mapping"]
			
 
				+                else:
			
 
				+                    entity_mapping = None
			
 
				+                if submission_type == "CRF":
			
 
				+                    meta_data_input = {
			
 
				+                        "title": f"CRF data submitted in {submission_year} to the "
			
 
				+                        f"UNFCCC in the {type_name} ({submission_type}) by "
			
 
				+                        f"{country_name}. "
			
 
				+                        f"Submission date: {date_or_version}"
			
 
				+                    }
			
 
				+                else:
			
 
				+                    meta_data_input = {
			
 
				+                        "title": f"Data submitted for round {submission_year} "
			
 
				+                        f"to the UNFCCC in the {type_name} ({submission_type}) by "
			
 
				+                        f"{country_name}. Submission version: {date_or_version}"
			
 
				+                    }
			
 
				+
			
 
				+                if "decimal_sep" in crf_spec[table]["table"]:
			
 
				+                    decimal_sep = crf_spec[table]["table"]["decimal_sep"]
			
 
				+                else:
			
 
				+                    decimal_sep = "."
			
 
				+                if "thousands_sep" in crf_spec[table]["table"]:
			
 
				+                    thousands_sep = crf_spec[table]["table"]["thousands_sep"]
			
 
				+                else:
			
 
				+                    thousands_sep = ","
			
 
				+
			
 
				+                ds_table_if = convert_crf_table_to_pm2if(
			
 
				+                    ds_table,
			
 
				+                    submission_year,
			
 
				+                    meta_data_input=meta_data_input,
			
 
				+                    entity_mapping=entity_mapping,
			
 
				+                    submission_type=submission_type,
			
 
				+                    decimal_sep=decimal_sep,
			
 
				+                    thousands_sep=thousands_sep,
			
 
				+                )
			
 
				 
			
 
				+                # skip empty tables
			
 
				                 if (
			
 
				-                    ("CH4 removals" in ds_table_pm2.data_vars)
			
 
				-                    and ("CH4 emissions" in ds_table_pm2.data_vars)
			
 
				-                    and "CH4" not in ds_table_pm2.data_vars
			
 
				+                    not ds_table_if.set_index(ds_table_if.attrs["dimensions"]["*"])
			
 
				+                    .isna()
			
 
				+                    .all(axis=None)
			
 
				                 ):
			
 
				-                    # we can just sum to CH4 as we made sure that it doesn't exist.
			
 
				-                    # If we have CH4 and removals but not emissions, CH4 already has
			
 
				-                    # removals subtracted and we do nothing here
			
 
				-                    ds_table_pm2["CH4"] = ds_table_pm2[
			
 
				-                        ["CH4 emissions", "CH4 removals"]
			
 
				-                    ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				-                    ds_table_pm2["CH4"].attrs["entity"] = "CH4"
			
 
				-
			
 
				-                # combine per table DS
			
 
				-                if ds_all is None:
			
 
				-                    ds_all = ds_table_pm2
			
 
				+                    # now convert to native PRIMAP2 format
			
 
				+                    ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
			
 
				+
			
 
				+                    # if individual data for emissions and removals / recovery exist
			
 
				+                    # combine them
			
 
				+                    if (
			
 
				+                        ("CO2 removals" in ds_table_pm2.data_vars)
			
 
				+                        and ("CO2 emissions" in ds_table_pm2.data_vars)
			
 
				+                        and "CO2" not in ds_table_pm2.data_vars
			
 
				+                    ):
			
 
				+                        # we can just sum to CO2 as we made sure that it doesn't exist.
			
 
				+                        # If we have CO2 and removals but not emissions, CO2 already has
			
 
				+                        # removals subtracted and we do nothing here
			
 
				+                        ds_table_pm2["CO2"] = ds_table_pm2[
			
 
				+                            ["CO2 emissions", "CO2 removals"]
			
 
				+                        ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				+                        ds_table_pm2["CO2"].attrs["entity"] = "CO2"
			
 
				+
			
 
				+                    if (
			
 
				+                        ("CH4 removals" in ds_table_pm2.data_vars)
			
 
				+                        and ("CH4 emissions" in ds_table_pm2.data_vars)
			
 
				+                        and "CH4" not in ds_table_pm2.data_vars
			
 
				+                    ):
			
 
				+                        # we can just sum to CH4 as we made sure that it doesn't exist.
			
 
				+                        # If we have CH4 and removals but not emissions, CH4 already has
			
 
				+                        # removals subtracted and we do nothing here
			
 
				+                        ds_table_pm2["CH4"] = ds_table_pm2[
			
 
				+                            ["CH4 emissions", "CH4 removals"]
			
 
				+                        ].pr.sum(dim="entity", skipna=True, min_count=1)
			
 
				+                        ds_table_pm2["CH4"].attrs["entity"] = "CH4"
			
 
				+
			
 
				+                    # combine per table DS
			
 
				+                    if ds_all is None:
			
 
				+                        ds_all = ds_table_pm2
			
 
				+                    else:
			
 
				+                        ds_all = ds_all.combine_first(ds_table_pm2)
			
 
				                 else:
			
 
				-                    ds_all = ds_all.combine_first(ds_table_pm2)
			
 
				+                    # log that table is empty
			
 
				+                    empty_tables.append([table, country_code, ""])
			
 
				+            elif not_present:
			
 
				+                # log that table is not present
			
 
				+                missing_worksheets.append([table, country_code, ""])
			
 
				             else:
			
 
				-                # log that table is empty
			
 
				-                empty_tables.append(table)
			
 
				+                print(
			
 
				+                    f"Empty DataFrame returned for table {table}, "
			
 
				+                    f"country {country_code}. Check log for errors."
			
 
				+                )
			
 
				 
			
 
				         # check if there were log messages.
			
 
				         save_data = True
			
@@ -296,6 +324,19 @@ def read_crf_for_country(  # noqa: PLR0912, PLR0915
 
				             )
			
 
				             save_empty_tables_info(empty_tables, log_location)
			
 
				 
			
 
				+        if len(missing_worksheets) > 0:
			
 
				+            today = date.today()
			
 
				+            log_location = (
			
 
				+                log_path
			
 
				+                / f"{submission_type}{submission_year}"
			
 
				+                / f"{country_code}_missing_tables_{today.strftime('%Y-%m-%d')}.csv"
			
 
				+            )
			
 
				+            print(
			
 
				+                f"Missing worksheets for {country_code}: "
			
 
				+                f"{empty_tables}. Save log to {log_location}"
			
 
				+            )
			
 
				+            save_empty_tables_info(missing_worksheets, log_location)
			
 
				+
			
 
				         if save_data:
			
 
				             compression = dict(zlib=True, complevel=9)
			
 
				             output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")