Преглед на файлове

Add code for USA 2024 inventory; some small fixes in japan 2024 inventory and docs

Johannes Gütschow преди 8 месеца
родител
ревизия
d3f4dc179d
променени са 21 файла, в които са добавени 594 реда и са изтрити 19 реда
  1. 1 1
      src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
  2. 1 1
      src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
  3. 1 1
      src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
  4. 1 1
      src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
  5. 1 1
      src/unfccc_ghg_data/unfccc_reader/Japan/__init__.py
  6. 2 2
      src/unfccc_ghg_data/unfccc_reader/Japan/config_jpn_inv2024.py
  7. 1 1
      src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
  8. 1 1
      src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
  9. 1 1
      src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
  10. 1 1
      src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
  11. 1 1
      src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
  12. 1 1
      src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
  13. 1 1
      src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
  14. 1 1
      src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
  15. 1 1
      src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
  16. 1 1
      src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
  17. 1 1
      src/unfccc_ghg_data/unfccc_reader/United_Kingdom/__init__.py
  18. 0 1
      src/unfccc_ghg_data/unfccc_reader/United_Kingdom/read_GBR_2024_Inventory_from_xlsx.py
  19. 30 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/__init__.py
  20. 364 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/config_usa_inv2024.py
  21. 182 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/read_USA_2024_Inventory_from_xlsx.py

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py

@@ -1,6 +1,6 @@
 """Read Chile's BURs, NIRs, NCs
 """Read Chile's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Chile's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py

@@ -1,6 +1,6 @@
 """Read Colombia's BURs, NIRs, NCs
 """Read Colombia's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Colombia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py

@@ -1,6 +1,6 @@
 """Read Indonesia's BURs, NIRs, NCs
 """Read Indonesia's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Indonesia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py

@@ -1,6 +1,6 @@
 """Read Israel's BURs, NIRs, NCs
 """Read Israel's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Israel's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Japan/__init__.py

@@ -1,6 +1,6 @@
 """Read Japan's national inventories
 """Read Japan's national inventories
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Japans's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 2 - 2
src/unfccc_ghg_data/unfccc_reader/Japan/config_jpn_inv2024.py

@@ -348,12 +348,12 @@ cat_conversion = {
         },
         },
         "3.A": {"sources": ["3.A.1", "3.A.2"]},
         "3.A": {"sources": ["3.A.1", "3.A.2"]},
         "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"]},
         "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"]},
-        "3.C.1": {"sources": ["3.C.1.AG"]},
+        "3.C.1": {"sources": ["M.3.C.1.AG"]},
         "M.3.C.AG": {"sources": ["3.C.1", "3.C.2", "3.C.3", "M.3.C.45.AG", "3.C.7"]},
         "M.3.C.AG": {"sources": ["3.C.1", "3.C.2", "3.C.3", "M.3.C.45.AG", "3.C.7"]},
         "3.C": {"sources": ["M.3.C.AG"]},
         "3.C": {"sources": ["M.3.C.AG"]},
         "M.3.D.LU": {"sources": ["3.D.1", "M.3.D.2.LU"]},
         "M.3.D.LU": {"sources": ["3.D.1", "M.3.D.2.LU"]},
         "3.D": {"sources": ["M.3.D.LU"]},
         "3.D": {"sources": ["M.3.D.LU"]},
-        "M.AG.ELV": {"sources": ["3.C"]},
+        "M.AG.ELV": {"sources": ["M.3.C.AG"]},
         "3": {"sources": ["3.A", "3.B", "3.C", "3.D"]},
         "3": {"sources": ["3.A", "3.B", "3.C", "3.D"]},
         "4": {"sources": ["4.A", "4.B", "4.C", "4.D", "4.E"]},
         "4": {"sources": ["4.A", "4.B", "4.C", "4.D", "4.E"]},
         # consistency check
         # consistency check

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py

@@ -1,6 +1,6 @@
 """Read Malaysia's BURs, NIRs, NCs
 """Read Malaysia's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Malaysia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py

@@ -1,6 +1,6 @@
 """Read Mexico's BURs, NIRs, NCs
 """Read Mexico's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Mexico's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py

@@ -1,6 +1,6 @@
 """Read Montenegro's BURs, NIRs, NCs
 """Read Montenegro's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Montenegro's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py

@@ -1,6 +1,6 @@
 """Read Morocco's BURs, NIRs, NCs
 """Read Morocco's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Morocco's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py

@@ -1,6 +1,6 @@
 """Read Nigeria's BURs, NIRs, NCs
 """Read Nigeria's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Nigeria's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py

@@ -1,6 +1,6 @@
 """Read Peru's BURs, NIRs, NCs
 """Read Peru's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Peru's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py

@@ -1,6 +1,6 @@
 """Read South Korea's BURs, NIRs, NCs
 """Read South Korea's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read South Korea's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py

@@ -1,6 +1,6 @@
 """Read Singapore's BURs, NIRs, NCs
 """Read Singapore's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Singapore's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py

@@ -1,6 +1,6 @@
 """Read Taiwan's inventories
 """Read Taiwan's inventories
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Taiwan's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py

@@ -1,6 +1,6 @@
 """Read Thailand's BURs, NIRs, NCs
 """Read Thailand's BURs, NIRs, NCs
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Thailand's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/United_Kingdom/__init__.py

@@ -1,6 +1,6 @@
 """Read United Kingdom's national inventories
 """Read United Kingdom's national inventories
 
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read United Kingdoms's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 
 

+ 0 - 1
src/unfccc_ghg_data/unfccc_reader/United_Kingdom/read_GBR_2024_Inventory_from_xlsx.py

@@ -147,7 +147,6 @@ if __name__ == "__main__":
     )
     )
 
 
     # adapt source and metadata
     # adapt source and metadata
-    # TODO: processing info is present twice
     current_source = data_pm2_2006.coords["source"].to_numpy()[0]
     current_source = data_pm2_2006.coords["source"].to_numpy()[0]
     data_temp = data_pm2_2006.pr.loc[{"source": current_source}]
     data_temp = data_pm2_2006.pr.loc[{"source": current_source}]
     data_pm2_2006 = data_pm2_2006.pr.set("source", "AI_INV", data_temp)
     data_pm2_2006 = data_pm2_2006.pr.set("source", "AI_INV", data_temp)

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/__init__.py

@@ -0,0 +1,30 @@
+"""Read United States of America's national inventories
+
+Scripts and configurations to read United States of America's submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'USA'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=USA
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 364 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/config_usa_inv2024.py

@@ -0,0 +1,364 @@
+"""Config for USA 2024 Inventroy
+
+General configuration for reading the inventory files for USA's official 2024
+inventory from xlsx
+
+"""
+
+gwp_to_use = "AR5GWP100"
+category_mapping = {
+    "Abandoned Oil and Gas Wells": "M.1.B.2.ab.6",
+    "Abandoned Underground Coal Mines": "1.B.1.a.i.3",
+    "Adipic Acid Production": "2.B.3",
+    "Agricultural Soil Management": "3.D",
+    "Aluminum Production": "2.C.3",
+    "Ammonia Production": "2.B.1",
+    "Anaerobic Digestion at Biogas Facilities": "5.B.2",
+    "Biomass and Biodiesel Consumptiona": "M.Memo.Bio",
+    #'CH4': '4',
+    "CH4c": "M.0.EL",
+    "CO2": "M.0.EL",
+    "Caprolactam, Glyoxal, and Glyoxylic Acid Production": "2.B.4",
+    "Carbide Production and Consumption": "2.B.5",
+    "Carbon Dioxide Consumption": "M.2.B.10.b",
+    "Cement Production": "2.A.1",
+    "Coal Mining": "M.1.B.1.a",  # abandoned underground mines are missing
+    "Commercial": "1.A.4.a",
+    "Composting": "5.B.1",
+    "Electric Power Sector": "1.A.1",
+    "Electrical Equipment": "2.G.1",
+    "Electronics Industry": "2.E",
+    "Enteric Fermentation": "3.A",
+    "Ferroalloy Production": "2.C.2",
+    "Field Burning of Agricultural Residues": "3.F",
+    "Fluorochemical Production": "2.B.9",
+    "Fossil Fuel Combustion": "1.A",
+    "Glass Production": "2.A.3",
+    "HFCs": "2",
+    "Incineration of Waste": "1.A.5.a.iv",
+    "Industrial": "1.A.2",
+    "International Bunker Fuelsb": "M.Memo.Int",
+    "Iron and Steel Production & Metallurgical Coke Production": "2.C.1",
+    # 'LULUCF Carbon Stock Changee': '',
+    # 'LULUCF Emissionsc': '',
+    # 'LULUCF Sector Net Totalf': '',
+    "Landfills": "5.A.1",
+    "Lead Production": "2.C.5",
+    "Lime Production": "2.A.2",
+    "Liming": "3.G",
+    "Magnesium Production and Processing": "2.C.4",
+    "Manure Management": "3.B",
+    "Mobile Combustion": "M.1.A.MOB",
+    #'N2O': '4',
+    "N2O from Product Uses": "2.G.3",
+    "N2Oc": "M.0.EL",
+    "NF3": "2",
+    "Natural Gas Systems": "M.1.B.2.b",  # abandoned wells missing
+    "Net Emissions (Sources and Sinks)": "0",
+    "Nitric Acid Production": "2.B.2",
+    "Non-Energy Use of Fuels": "1.A.5.a.iii",
+    "Other Process Uses of Carbonates": "2.A.4",
+    "PFCs": "2",
+    "Petrochemical Production": "2.B.8",
+    "Petroleum Systems": "M.1.B.2.a",  # abandoned wells missing
+    "Phosphoric Acid Production": "M.2.B.10.c",
+    "Residential": "1.A.4.b",
+    "Rice Cultivation": "3.C",
+    "SF6": "2",
+    "SF6 and PFCs from Other Product Use": "2.G.2",
+    "Soda Ash Production": "2.B.7",
+    "Stationary Combustion": "M.1.A.STAT",
+    "Substitution of Ozone Depleting Substances": "2.F",
+    "Titanium Dioxide Production": "2.B.6",
+    "Total Gross Emissions (Sources)": "M.0.EL",
+    "Transportation": "1.A.3",
+    "U.S. Territories": "1.A.5.a.v",
+    "Urea Consumption for Non-Agricultural Purposes": "M.2.B.10.a",
+    "Urea Fertilization": "3.H",
+    "Wastewater Treatment": "5.D",
+    "Zinc Production": "2.C.6",
+}
+
+category_col = "Gas/Source"
+inventory_files = {
+    "Table 2-1.csv": {
+        "CO2": None,
+        "CH4c": None,
+        "N2Oc": None,
+        "HFCs": {
+            "coords_defaults": {
+                "entity": f"HFCS ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "PFCs": {
+            "coords_defaults": {
+                "entity": f"PFCS ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "SF6": {
+            "coords_defaults": {
+                "entity": f"SF6 ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "NF3": {
+            "coords_defaults": {
+                "entity": f"NF3 ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "Total Gross Emissions (Sources)": {
+            "coords_defaults": {
+                "entity": f"KYOTOGHG ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "LULUCF Emissionsc": {
+            "coords_defaults": {"unit": "Mt CO2 / year"},
+            "coords_value_mapping": {
+                "entity": {
+                    #'LULUCF Emissionsc': '',
+                    "CH4": f"CH4 ({gwp_to_use})",
+                    "N2O": f"N2O ({gwp_to_use})",
+                    "LULUCF Carbon Stock Changee": "CO2",
+                    "LULUCF Sector Net Totalf": f"KYOTGHG ({gwp_to_use})",
+                },
+                "category": {
+                    #'LULUCF Emissionsc': '',
+                    "CH4": "4",
+                    "N2O": "4",
+                    "LULUCF Carbon Stock Changee": "4",
+                    "LULUCF Sector Net Totalf": "4",
+                },
+            },
+        },
+        "Net Emissions (Sources and Sinks)": {
+            "coords_defaults": {
+                "entity": f"KYOTOGHG ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "+ Does not exceed 0.05 MMT CO2 Eq.": None,
+    },
+    "Table 2-2.csv": {
+        "CO2": {
+            "coords_defaults": {"entity": "CO2", "unit": "kt CO2 / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "CH4c": {
+            "coords_defaults": {"entity": "CH4", "unit": "kt CH4 / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "N2Oc": {
+            "coords_defaults": {"entity": "N2O", "unit": "kt N2O / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "HFCs": None,
+        "PFCs": None,
+        "SF?": None,
+        "NF?": None,
+        "+ Does not exceed 0.5 kt.": None,
+    },
+}
+
+time_format = "%Y"
+
+coords_cols_template = {
+    "category": category_col,
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "CRF2013_2023",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults_template = {
+    "source": "USA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "USA",
+    "scenario": "2024INV",
+}
+
+meta_data = {
+    "references": "https://www.epa.gov/ghgemissions/"
+    "inventory-us-greenhouse-gas-emissions-and-sinks-1990-2022",
+    "rights": "",
+    "contact": "johannes.guetschow@climate-resource.com",
+    "title": "Inventory of U.S. Greenhouse Gas Emissions and Sinks: 1990-2022",
+    "comment": "Read fom csv files by Johannes Gütschow",
+    "institution": "United States Environmental Protection Agency",
+}
+
+filter_remove = {
+    "f1": {
+        category_col: [
+            "LULUCF Emissionsc",
+        ]
+    }
+}
+
+### processing
+
+cat_conversion = {
+    "mapping": {
+        "0": "0",
+        "1.A": "1.A",
+        "1.A.1": "1.A.1",
+        "1.A.2": "1.A.2",
+        "1.A.3": "1.A.3",
+        "1.A.4.a": "1.A.4.a",
+        "1.A.4.b": "1.A.4.b",
+        "1.A.5.a.iii": "2.D",  # non energy fuel use
+        "1.A.5.a.iv": "M.1.A.5.a.iv",  # waste incineration
+        "1.A.5.a.v": "1.A.5.a.v",  # US Territories
+        "1.B.1.a.i.3": "1.B.1.a.i.3",
+        "2": "2",
+        "2.A.1": "2.A.1",
+        "2.A.2": "2.A.2",
+        "2.A.3": "2.A.3",
+        "2.A.4": "2.A.4",
+        "2.B.1": "2.B.1",
+        "2.B.2": "2.B.2",
+        "2.B.3": "2.B.3",
+        "2.B.4": "2.B.4",
+        "2.B.5": "2.B.5",
+        "2.B.6": "2.B.6",
+        "2.B.7": "2.B.7",
+        "2.B.8": "2.B.8",
+        "2.B.9": "2.B.9",
+        "2.C.1": "2.C.1",
+        "2.C.2": "2.C.2",
+        "2.C.3": "2.C.3",
+        "2.C.4": "2.C.4",
+        "2.C.5": "2.C.5",
+        "2.C.6": "2.C.6",
+        "2.E": "2.E",
+        "2.F": "2.F",
+        "2.G.1": "2.G.1",
+        "2.G.2": "2.G.2",
+        "2.G.3": "2.G.3",
+        "3.A": "3.A.1",
+        "3.B": "3.A.2",
+        "3.C": "3.C.7",
+        "3.D": "M.3.C.45.AG",
+        "3.F": "M.3.C.1.AG",
+        "3.G": "3.C.2",
+        "3.H": "3.C.3",
+        "4": "M.LULUCF",
+        "5.A.1": "4.A.1",
+        "5.B.1": "4.B.1",
+        "5.B.2": "4.B.2",
+        "5.D": "4.D",
+        "M.0.EL": "M.0.EL",
+        "M.1.A.MOB": "M.1.A.MOB",
+        "M.1.A.STAT": "M.1.A.STAT",
+        "M.1.B.1.a": "M.1.B.1.a",
+        "M.1.B.2.a": "M.1.B.2.a",
+        "M.1.B.2.ab.6": "M.1.B.2.ab.6",
+        "M.1.B.2.b": "M.1.B.2.b",
+        "M.2.B.10.a": "M.2.B.10.a",
+        "M.2.B.10.b": "M.2.B.10.b",
+        "M.2.B.10.c": "M.2.B.10.c",
+        "M.Memo.Bio": "M.BIO",
+        "M.Memo.Int": "M.BK",
+    },
+    "aggregate": {
+        # 1
+        "1.A.4": {"sources": ["1.A.4.a", "1.A.4.b"]},
+        "1.A.5.a": {"sources": ["M.1.A.5.a.iv", "M.1.A.5.a.v"]},
+        "1.A.5": {"sources": ["1.A.5.a"]},
+        "1.A": {
+            "sources": ["M.1.A.MOB", "M.1.A.STAT", "1.A.5"],
+            "filter": {"entity": ["CH4", "N2O"]},
+        },
+        "1.B.1": {"sources": ["M.1.B.1.a", "1.B.1.a.i.3"]},
+        "1.B.2": {"sources": ["M.1.B.2.a", "M.1.B.2.b", "M.1.B.2.ab.6"]},
+        "1.B": {"sources": ["1.B.1", "1.B.2"]},
+        "1": {"sources": ["1.A", "1.B"]},
+        # 2
+        "2.A": {"sources": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"]},
+        "2.B.10": {"sources": ["M.2.B.10.a", "M.2.B.10.b", "M.2.B.10.c"]},
+        "2.B": {
+            "sources": [
+                "2.B.1",
+                "2.B.2",
+                "2.B.3",
+                "2.B.4",
+                "2.B.5",
+                "2.B.6",
+                "2.B.7",
+                "2.B.8",
+                "2.B.9",
+                "2.B.10",
+            ]
+        },
+        "2.C": {"sources": ["2.C.1", "2.C.2", "2.C.3", "2.C.4", "2.C.5", "2.C.6"]},
+        "2.G": {"sources": ["2.G.1", "2.G.2", "2.G.3"]},
+        "2": {
+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G"],
+            "tolerance": 0.251,  # rounding inconsistencies in NF3 and PFCs after 2008
+        },
+        # M.AG
+        "3.A": {"sources": ["3.A.1", "3.A.2"]},
+        "3.C.1": {"sources": ["M.3.C.1.AG"]},
+        "M.3.C.AG": {"sources": ["3.C.1", "3.C.2", "3.C.3", "M.3.C.45.AG", "3.C.7"]},
+        "3.C": {"sources": ["M.3.C.AG"]},
+        "M.AG.ELV": {"sources": ["M.3.C.AG"]},
+        "M.AG": {"sources": ["M.AG.ELV", "3.A"]},
+        # 3
+        "3": {"sources": ["M.AG", "M.LULUCF"]},
+        # 4
+        "4.A": {"sources": ["4.A.1"]},
+        "4.B": {"sources": ["4.B.1", "4.B.2"]},
+        "4": {"sources": ["4.A", "4.B", "4.D"]},
+        # consistency check
+        "0": {"sources": ["1", "2", "3", "4"]},
+        "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},
+    },
+}
+
+basket_copy = {
+    "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
+    "entities": ["HFCS", "PFCS"],
+    "source_GWP": gwp_to_use,
+}
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}
+
+terminology_proc = "IPCC2006_PRIMAP"

+ 182 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/read_USA_2024_Inventory_from_xlsx.py

@@ -0,0 +1,182 @@
+"""
+Read USA's 2024 inventory from xlsx
+
+Files available here: https://www.epa.gov/ghgemissions/
+inventory-us-greenhouse-gas-emissions-and-sinks-1990-2022
+
+Only the overview tables are read as details are in several individual tables and
+overview is sufficient for PRIMAP-hist
+
+"""
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    downloaded_data_path,
+    extracted_data_path,
+    process_data_for_country,
+)
+from unfccc_ghg_data.unfccc_reader.United_States_of_America.config_usa_inv2024 import (
+    basket_copy,
+    cat_conversion,
+    category_col,
+    coords_cols_template,
+    coords_defaults_template,
+    coords_terminologies,
+    filter_remove,
+    gas_baskets,
+    inventory_files,
+    meta_data,
+    terminology_proc,
+    time_format,
+)
+
+if __name__ == "__main__":
+    pd.set_option("future.no_silent_downcasting", True)
+
+    # ###
+    # configuration
+    # ###
+
+    # folders and files
+    input_folder = (
+        downloaded_data_path
+        / "non-UNFCCC"
+        / "United_States_of_America"
+        / "2024-Inventory"
+        / "main-text-tables"
+        / "trends"
+    )
+    output_folder = extracted_data_path / "non-UNFCCC" / "United_States_of_America"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "USA_2024-Inventory_"
+    compression = dict(zlib=True, complevel=9)
+
+    # ###
+    # start data reading
+    # ###
+
+    data_pm2 = None
+
+    for file in inventory_files.keys():
+        data_current_pd = pd.read_csv(input_folder / file, header=[1])
+        # remove the thousands separators (can't be done during reading as data is
+        # stored as string)
+        all_cols = data_current_pd.columns
+        data_cols = [col for col in all_cols if col != category_col]
+        for col in data_cols:
+            if data_current_pd.dtypes[col] == "object":
+                data_current_pd[col] = data_current_pd[col].str.replace(",", "")
+
+        section_keys = inventory_files[file].keys()
+        key_info = {}
+        last_key = None
+        for i, row in data_current_pd.iterrows():
+            if row[category_col] in section_keys:
+                key_info[row[category_col]] = {}
+                key_info[row[category_col]]["start"] = i
+                if last_key is not None:
+                    key_info[last_key]["end"] = i
+                last_key = row[category_col]
+
+        for section_key in section_keys:
+            current_config = inventory_files[file][section_key]
+            if current_config is not None:
+                # get the data
+                if "end" in key_info[section_key].keys():
+                    data_section = data_current_pd.iloc[
+                        key_info[section_key]["start"] : key_info[section_key]["end"]
+                    ].copy()
+                else:
+                    data_section = data_current_pd.iloc[
+                        key_info[section_key]["start"] :
+                    ].copy()
+
+                # convert to primap2 IF
+                coords_defaults = coords_defaults_template.copy()
+                coords_defaults.update(current_config["coords_defaults"])
+                coords_value_mapping = current_config["coords_value_mapping"]
+                coords_cols = coords_cols_template.copy()
+                if "entity" in coords_value_mapping:
+                    # make a copy of the category column as we also need if for entity
+                    data_section["entity"] = data_section[category_col]
+                    coords_cols["entity"] = "entity"
+
+                data_section_if = pm2.pm2io.convert_wide_dataframe_if(
+                    data_section,
+                    coords_cols=coords_cols,
+                    coords_terminologies=coords_terminologies,
+                    coords_defaults=coords_defaults,
+                    coords_value_mapping=coords_value_mapping,
+                    filter_remove=filter_remove,
+                    meta_data=meta_data,
+                    time_format=time_format,
+                )
+                # convert to primap2 native format
+                data_section_pm2 = pm2.pm2io.from_interchange_format(data_section_if)
+
+                # merge with other data
+                if data_pm2 is None:
+                    data_pm2 = data_section_pm2
+                else:
+                    data_pm2 = data_pm2.pr.merge(data_section_pm2)
+
+    # convert back to IF to have units in the fixed format
+    data_if = data_pm2.pr.to_interchange_format()
+
+    # ###
+    # save data to IF and native format
+    # ###
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"]), data_if
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+        encoding=encoding,
+    )
+
+    # ###
+    # conversion to ipcc 2006 categories
+    # ###
+
+    data_pm2_2006 = data_pm2.copy()
+
+    # actual processing
+
+    country_processing = {
+        "basket_copy": basket_copy,
+    }
+
+    data_pm2_2006 = process_data_for_country(
+        data_pm2_2006,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        processing_info_country=country_processing,
+        cat_terminology_out=terminology_proc,
+        category_conversion=cat_conversion,
+        # sectors_out=sectors_to_save,
+    )
+
+    # adapt source and metadata
+    current_source = data_pm2_2006.coords["source"].to_numpy()[0]
+    data_temp = data_pm2_2006.pr.loc[{"source": current_source}]
+    data_pm2_2006 = data_pm2_2006.pr.set("source", "AI_INV", data_temp)
+    data_pm2_2006 = data_pm2_2006.pr.loc[{"source": ["AI_INV"]}]
+
+    # convert back to IF to have units in the fixed format
+    data_if_2006 = data_pm2_2006.pr.to_interchange_format()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_if_2006,
+    )
+
+    encoding = {var: compression for var in data_pm2_2006.data_vars}
+    data_pm2_2006.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )