Browse Source

more work on CRT1 specification, some code modifcations and according modifications to CRF specifications

Johannes Gütschow 3 months ago
parent
commit
a02a0767bc

+ 28 - 29
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2021_specification.py

@@ -35,7 +35,6 @@ Missing tables are:
 
 """
 
-import numpy as np
 
 from .util import unit_info
 
@@ -49,7 +48,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -90,7 +89,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -154,7 +153,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -361,7 +360,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -724,7 +723,7 @@ CRF2021 = {
             ],
             "stop_cats": [
                 "Note: All footnotes for this table are given at the end of the table on sheet 4.",
-                np.nan,
+                "nan",
             ],
             "unit_info": unit_info["default"],
         },
@@ -1181,7 +1180,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1607,7 +1606,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS CO2",
                 "EMISSIONS CH4 Recovery/Flaring(2)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1653,7 +1652,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1717,7 +1716,7 @@ CRF2021 = {
                 "ACTIVITY DATA CO2 transported or injected(1)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1755,7 +1754,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1773,7 +1772,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1821,7 +1820,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1867,7 +1866,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1885,7 +1884,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1903,7 +1902,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["fgases"],
         },
         "sector_mapping": [
@@ -1980,7 +1979,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2163,7 +2162,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2209,7 +2208,7 @@ CRF2021 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Organic amendments added(3)",
                 "IMPLIED EMISSION FACTOR (1) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2251,7 +2250,7 @@ CRF2021 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Value",
                 "IMPLIED EMISSION FACTORS Value",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2297,7 +2296,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2341,7 +2340,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2359,7 +2358,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2377,7 +2376,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2434,7 +2433,7 @@ CRF2021 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2496,7 +2495,7 @@ CRF2021 = {
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 flared",
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2529,7 +2528,7 @@ CRF2021 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2609,7 +2608,7 @@ CRF2021 = {
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) CH4",
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) N2O",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2700,7 +2699,7 @@ CRF2021 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for Energy Recovery(5)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [

+ 31 - 32
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2022_specification.py

@@ -36,7 +36,6 @@ Missing tables are:
 
 """
 
-import numpy as np
 
 from .util import unit_info
 
@@ -50,7 +49,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -91,7 +90,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -155,7 +154,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -362,7 +361,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -725,7 +724,7 @@ CRF2022 = {
             ],
             "stop_cats": [
                 "Note: All footnotes for this table are given at the end of the table on sheet 4.",
-                np.nan,
+                "nan",
             ],
             "unit_info": unit_info["default"],
         },
@@ -1197,7 +1196,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1627,7 +1626,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS CO2",
                 "EMISSIONS CH4 Recovery/Flaring(2)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1674,7 +1673,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1739,7 +1738,7 @@ CRF2022 = {
                 "ACTIVITY DATA CO2 transported or injected(1)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1777,7 +1776,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1795,7 +1794,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1843,7 +1842,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1889,7 +1888,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1907,7 +1906,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1925,7 +1924,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["fgases"],
         },
         "sector_mapping": [
@@ -2002,7 +2001,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2185,7 +2184,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2231,7 +2230,7 @@ CRF2022 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Organic amendments added(3)",
                 "IMPLIED EMISSION FACTOR (1) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2273,7 +2272,7 @@ CRF2022 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Value",
                 "IMPLIED EMISSION FACTORS Value",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2319,7 +2318,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2363,7 +2362,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2381,7 +2380,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2399,7 +2398,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2456,7 +2455,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2520,7 +2519,7 @@ CRF2022 = {
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 flared",
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2553,7 +2552,7 @@ CRF2022 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2635,7 +2634,7 @@ CRF2022 = {
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) CH4",
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) N2O",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2726,7 +2725,7 @@ CRF2022 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for Energy Recovery(5)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2756,7 +2755,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2807,7 +2806,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2860,7 +2859,7 @@ CRF2022 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [

+ 24 - 25
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2023_aus_specification.py

@@ -39,7 +39,6 @@ TODO:
 
 """
 
-import numpy as np
 
 from .util import unit_info
 
@@ -56,7 +55,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -141,7 +140,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -269,7 +268,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -436,7 +435,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -589,7 +588,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "AMOUNT CAPTURED (4) CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -753,7 +752,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS CH4 (3)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -814,7 +813,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -877,7 +876,7 @@ CRF2023_AUS = {
                 "ACTIVITY DATA CO2 transported or injected (1)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -922,7 +921,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -940,7 +939,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1013,7 +1012,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["fgases"],
         },
         "sector_mapping": [
@@ -1093,7 +1092,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1164,7 +1163,7 @@ CRF2023_AUS = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Average CH4 conversion rate (Ym) (2)",
                 "IMPLIED EMISSION FACTORS CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1228,7 +1227,7 @@ CRF2023_AUS = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION CH4 producing potential (Bo) (2) (average) Warm",
                 "IMPLIED EMISSION FACTORS CH4 producing potential (Bo) (2) (average) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1328,7 +1327,7 @@ CRF2023_AUS = {
                 "EMISSIONS N2O Indirect Atmospheric deposition",
                 "EMISSIONS N2O Indirect Nitrogen leaching and run-off",
             ],
-            "stop_cats": ["", np.nan, "3.B.5. Indirect N2O emissions"],
+            "stop_cats": ["", "nan", "3.B.5. Indirect N2O emissions"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1390,7 +1389,7 @@ CRF2023_AUS = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Organic amendments added (3)",
                 "IMPLIED EMISSION FACTOR (1) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1434,7 +1433,7 @@ CRF2023_AUS = {
                 # "volatilises as NH3 and NOX",
                 # "Value 0.11",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1482,7 +1481,7 @@ CRF2023_AUS = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1508,7 +1507,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1526,7 +1525,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1544,7 +1543,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1594,7 +1593,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1641,7 +1640,7 @@ CRF2023_AUS = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [],  # "", np.nan],
+            "stop_cats": [],  # "", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -1698,7 +1697,7 @@ CRF2023_AUS = {
             ["6. Other (please specify) (7)", ["6"]],
             ["NA", ["\\IGNORE"]],
             ["", ["\\IGNORE"]],
-            [np.nan, ["\\IGNORE"]],
+            ["nan", ["\\IGNORE"]],
             ["Memo items: (8)", ["\\IGNORE"]],
             ["1.D.1. International bunkers", ["M.Memo.Int"]],
             ["1.D.1.a. Aviation", ["M.Memo.Int.Avi"]],

+ 31 - 32
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2023_specification.py

@@ -35,7 +35,6 @@ Missing tables are:
 * Add activity data
 """
 
-import numpy as np
 
 from .util import unit_info
 
@@ -53,7 +52,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -94,7 +93,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -158,7 +157,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -365,7 +364,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -728,7 +727,7 @@ CRF2023 = {
             ],
             "stop_cats": [
                 "Note: All footnotes for this table are given at the end of the table on sheet 4.",
-                np.nan,
+                "nan",
             ],
             "unit_info": unit_info["default"],
         },
@@ -1207,7 +1206,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1639,7 +1638,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS CO2",
                 "EMISSIONS CH4 Recovery/Flaring(2)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1688,7 +1687,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1753,7 +1752,7 @@ CRF2023 = {
                 "ACTIVITY DATA CO2 transported or injected(1)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1791,7 +1790,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1809,7 +1808,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1857,7 +1856,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1903,7 +1902,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1921,7 +1920,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1939,7 +1938,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["fgases"],
         },
         "sector_mapping": [
@@ -2016,7 +2015,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2199,7 +2198,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2245,7 +2244,7 @@ CRF2023 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Organic amendments added(3)",
                 "IMPLIED EMISSION FACTOR (1) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2287,7 +2286,7 @@ CRF2023 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Value",
                 "IMPLIED EMISSION FACTORS Value",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2333,7 +2332,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2377,7 +2376,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2395,7 +2394,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2413,7 +2412,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2470,7 +2469,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2536,7 +2535,7 @@ CRF2023 = {
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 flared",
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2569,7 +2568,7 @@ CRF2023 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2651,7 +2650,7 @@ CRF2023 = {
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) CH4",
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) N2O",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2746,7 +2745,7 @@ CRF2023 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for Energy Recovery(5)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2776,7 +2775,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2827,7 +2826,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2880,7 +2879,7 @@ CRF2023 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [

+ 31 - 32
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2024_specification.py

@@ -35,7 +35,6 @@ Missing tables are:
 * Add activity data
 """
 
-import numpy as np
 
 from .util import unit_info
 
@@ -56,7 +55,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -97,7 +96,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -161,7 +160,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -368,7 +367,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -731,7 +730,7 @@ CRF2024 = {
             ],
             "stop_cats": [
                 "Note: All footnotes for this table are given at the end of the table on sheet 4.",
-                np.nan,
+                "nan",
             ],
             "unit_info": unit_info["default"],
         },
@@ -1210,7 +1209,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1642,7 +1641,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS CO2",
                 "EMISSIONS CH4 Recovery/Flaring(2)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1691,7 +1690,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS N2O",
                 "EMISSIONS CO2 Amount captured",
             ],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1756,7 +1755,7 @@ CRF2024 = {
                 "ACTIVITY DATA CO2 transported or injected(1)",
                 "IMPLIED EMISSION FACTORS CO2",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -1794,7 +1793,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category", "class"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1812,7 +1811,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1860,7 +1859,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["industry"],
         },
         "sector_mapping": [
@@ -1906,7 +1905,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1924,7 +1923,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -1942,7 +1941,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["fgases"],
         },
         "sector_mapping": [
@@ -2019,7 +2018,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2202,7 +2201,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": [".", np.nan],
+            "stop_cats": [".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2248,7 +2247,7 @@ CRF2024 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Organic amendments added(3)",
                 "IMPLIED EMISSION FACTOR (1) CH4",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2290,7 +2289,7 @@ CRF2024 = {
                 "ACTIVITY DATA AND OTHER RELATED INFORMATION Value",
                 "IMPLIED EMISSION FACTORS Value",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2336,7 +2335,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTORS CH4",
                 "IMPLIED EMISSION FACTORS N2O",
             ],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2380,7 +2379,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2398,7 +2397,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [],
@@ -2416,7 +2415,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", ".", np.nan],
+            "stop_cats": ["", ".", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2473,7 +2472,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2539,7 +2538,7 @@ CRF2024 = {
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 flared",
                 "EMISSIONS SINK CATEGORIES CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2572,7 +2571,7 @@ CRF2024 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for energy recovery(3)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2654,7 +2653,7 @@ CRF2024 = {
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) CH4",
                 "IMPLIED EMISSION FACTOR Amount of wastes (incinerated/open burned) N2O",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2749,7 +2748,7 @@ CRF2024 = {
                 "EMISSIONS CH4 Amount of CH4 flared",
                 "EMISSIONS CH4 Amount of CH4 for Energy Recovery(5)",
             ],
-            "stop_cats": [".", "", np.nan],
+            "stop_cats": [".", "", "nan"],
             "unit_info": unit_info["default"],
         },
         "sector_mapping": [
@@ -2779,7 +2778,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2830,7 +2829,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
@@ -2883,7 +2882,7 @@ CRF2024 = {
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
-            "stop_cats": ["", np.nan],
+            "stop_cats": ["", "nan"],
             "unit_info": unit_info["summary"],
         },
         "sector_mapping": [

File diff suppressed because it is too large
+ 723 - 94
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crt1_specification.py


+ 30 - 26
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -441,12 +441,6 @@ def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
         keep_default_na=False,
     )
 
-    if len(df_raw) < nrows:
-        # print(f"read data truncated because of all-nan rows")
-        last_row_nan = True
-    else:
-        last_row_nan = False
-
     cols_to_drop = []
     # remove empty first column (because CRTables start with an empty column)
     # df_raw = df_raw.dropna(how="all", axis=1)
@@ -515,7 +509,9 @@ def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
     # remove double spaces
     entities = [entity.strip() for entity in entities]
     entities = [re.sub("\\s+", " ", entity) for entity in entities]
-    entities = [re.sub("_x000d_ ", "", entity) for entity in entities]
+    entities = [re.sub("_x000d_", "", entity) for entity in entities]
+    entities = [re.sub("_x000D_", "", entity) for entity in entities]
+    entities = [re.sub("\\s+", " ", entity) for entity in entities]
 
     # replace the old header
     if len(header) > 2:  # noqa: PLR2004
@@ -554,7 +550,7 @@ def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
         )
 
         for idx in range(1, len(df_current)):
-            current_cat = df_current.iloc[idx][cat_col]
+            current_cat = str(df_current.iloc[idx][cat_col])
             if current_cat in table_properties["stop_cats"]:
                 # we've reached the end of the table, so stop processing
                 # and remove all further rows
@@ -639,16 +635,15 @@ def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
                 )
     else:
         for idx in range(1, len(df_current)):
-            current_cat = df_current.iloc[idx][cat_col]
+            current_cat = str(df_current.iloc[idx][cat_col])
             if current_cat in table_properties["stop_cats"]:
                 # we've reached the end of the table, so stop processing
                 # and remove all further rows
                 df_current = df_current.drop(df_current.index[idx:])
                 new_cats = new_cats[0:idx]
                 break
-            if current_cat in all_cats:
-                new_cats[idx] = unique_mapping[current_cat]
-                if (idx == len(df_current) - 1) and not last_row_nan:
+            else:
+                if idx == len(df_current) - 1:
                     print(
                         f"found information in last row: category {current_cat}, "
                         f"row {idx}"
@@ -656,14 +651,17 @@ def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
                     info_last_row.append(
                         [table, file_info["party"], current_cat, file_info["data_year"]]
                     )
-            else:
-                print(
-                    f"Unknown category '{current_cat}' found in {table} for "
-                    f"{file_info['party']}, {file_info['data_year']}."
-                )
-                unknown_categories.append(
-                    [table, file_info["party"], current_cat, file_info["data_year"]]
-                )
+                if current_cat in all_cats:
+                    new_cats[idx] = unique_mapping[current_cat]
+
+                else:
+                    print(
+                        f"Unknown category '{current_cat}' found in {table} for "
+                        f"{file_info['party']}, {file_info['data_year']}."
+                    )
+                    unknown_categories.append(
+                        [table, file_info["party"], current_cat, file_info["data_year"]]
+                    )
 
     for idx, col in enumerate(table_properties["categories"]):
         df_current.insert(loc=idx, column=col, value=[cat[idx] for cat in new_cats])
@@ -1131,10 +1129,11 @@ def filter_category(
 
     """
     string_exclude = "\\C!-"
+    string_include = "\\C-"
     regex_exclude = r"\\C!-([A-Z\-]+)\\"
     regex_exclude_full = r"(\\C!-[A-Z\-]+\\)"
-    string_country = f"\\C-{country}\\"
-    regex_countries = r"^\\C-[A-Z]{3}\\"
+    regex_include = r"\\C-([A-Z\-]+)\\"
+    regex_include_full = r"(\\C-[A-Z\-]+\\)"
     new_mapping = mapping.copy()
     if mapping[0].startswith(string_exclude):
         re_result = re.search(regex_exclude, mapping[0])
@@ -1145,10 +1144,15 @@ def filter_category(
         else:
             re_result = re.search(regex_exclude_full, mapping[0])
             new_mapping[0] = mapping[0][len(re_result.group(1)) + 1 :]
-    elif mapping[0].startswith(string_country):
-        new_mapping[0] = mapping[0][len(string_country) + 1 :]
-    elif re.match(regex_countries, mapping[0]):
-        new_mapping[0] = "\\REMOVE"
+    elif mapping[0].startswith(string_include):
+        re_result = re.search(regex_include, mapping[0])
+        countries_in = re_result.group(1)
+        countries_in = countries_in.split("-")
+        if country in countries_in:
+            re_result = re.search(regex_include_full, mapping[0])
+            new_mapping[0] = mapping[0][len(re_result.group(1)) + 1 :]
+        else:
+            new_mapping[0] = "\\REMOVE"
 
     return new_mapping
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py

@@ -432,4 +432,4 @@ def save_last_row_info(
     df_processed_lost_row_info = pd.DataFrame(
         processed_last_row_info, columns=["Table", "Country", "Categories"]
     )
-    df_processed_lost_row_info.to_csv("test_last_row_info.csv", index=False)
+    df_processed_lost_row_info.to_csv(file, index=False)

+ 56 - 9
tests/unit/test_crf_reader.py

@@ -1,29 +1,24 @@
 from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_core import (
+    filter_category,
     get_info_from_crf_filename,
     get_latest_date_for_country,
 )
 
-# def test_get_submission_dates()
-#     filter = {}
-#
-#     folder: Path,
-# file_filter: dict[str, Union[str, int, list]],
-
 
 def test_get_latest_date_for_country():
     # RUS CRF
     expected = "22082023"
-    date = get_latest_date_for_country("RUS", 2023, type="CRF")
+    date = get_latest_date_for_country("RUS", 2023, submission_type="CRF")
     assert date == expected
 
     # AUS CRT
     expected = "12042024"
-    date = get_latest_date_for_country("AUS", 1, type="CRT")
+    date = get_latest_date_for_country("AUS", 1, submission_type="CRT")
     assert date == expected
 
     # RUS CRT
     expected = "20241108"
-    date = get_latest_date_for_country("RUS", 1, type="CRT")
+    date = get_latest_date_for_country("RUS", 1, submission_type="CRT")
     assert date == expected
 
 
@@ -50,3 +45,55 @@ def test_get_info_from_crf_filename():
         "version": "V0.3",
     }
     assert expected == get_info_from_crf_filename(filename)
+
+
+def test_filter_category():
+    # general
+    map_gen = ["Option C (country-specific):", ["\\IGNORE"], 4]
+    assert filter_category(map_gen, "MOZ") == map_gen
+
+    # country specific
+    expected = [
+        "Other (as specified in table 3(I).A)",
+        ["3.A.1.C"],
+        5,
+    ]
+    expected_remove = ["\\REMOVE", ["3.A.1.C"], 5]
+    # exclude multiple
+    map_excl_multiple = [
+        "\\C!-AUS-MLT-LUX-POL-SVN-USA\\ Other (as specified in table 3(I).A)",
+        ["3.A.1.C"],
+        5,
+    ]
+    assert filter_category(map_excl_multiple, "MOZ") == expected
+    assert filter_category(map_excl_multiple, "MLT") == expected_remove
+
+    # exclude single
+    map_excl_single = [
+        "\\C!-AUS\\ Other (as specified in table 3(I).A)",
+        ["3.A.1.C"],
+        5,
+    ]
+    expected = [
+        "Other (as specified in table 3(I).A)",
+        ["3.A.1.C"],
+        5,
+    ]
+    expected_remove = ["\\REMOVE", ["3.A.1.C"], 5]
+    assert filter_category(map_excl_single, "MOZ") == expected
+    assert filter_category(map_excl_single, "AUS") == expected_remove
+
+    # include multiple
+    map_incl_multiple = [
+        "\\C-AUS-NLD\\ Other (as specified in table 3(I).A)",
+        ["3.A.1.C"],
+        5,
+    ]
+    assert filter_category(map_incl_multiple, "MOZ") == expected_remove
+    assert filter_category(map_incl_multiple, "AUS") == expected
+    assert filter_category(map_incl_multiple, "NLD") == expected
+
+    # include single
+    map_incl_single = ["\\C-AUS\\ Other (as specified in table 3(I).A)", ["3.A.1.C"], 5]
+    assert filter_category(map_incl_single, "MOZ") == expected_remove
+    assert filter_category(map_incl_single, "AUS") == expected

Some files were not shown because too many files changed in this diff