Browse Source

Add code for USA 2024 inventory; some small fixes in japan 2024 inventory and docs

Johannes Gütschow 8 months ago
parent
commit
d3f4dc179d
21 changed files with 594 additions and 19 deletions
  1. 1 1
      src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py
  2. 1 1
      src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py
  3. 1 1
      src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py
  4. 1 1
      src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py
  5. 1 1
      src/unfccc_ghg_data/unfccc_reader/Japan/__init__.py
  6. 2 2
      src/unfccc_ghg_data/unfccc_reader/Japan/config_jpn_inv2024.py
  7. 1 1
      src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py
  8. 1 1
      src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py
  9. 1 1
      src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py
  10. 1 1
      src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py
  11. 1 1
      src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py
  12. 1 1
      src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py
  13. 1 1
      src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py
  14. 1 1
      src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py
  15. 1 1
      src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py
  16. 1 1
      src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py
  17. 1 1
      src/unfccc_ghg_data/unfccc_reader/United_Kingdom/__init__.py
  18. 0 1
      src/unfccc_ghg_data/unfccc_reader/United_Kingdom/read_GBR_2024_Inventory_from_xlsx.py
  19. 30 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/__init__.py
  20. 364 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/config_usa_inv2024.py
  21. 182 0
      src/unfccc_ghg_data/unfccc_reader/United_States_of_America/read_USA_2024_Inventory_from_xlsx.py

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Chile/__init__.py

@@ -1,6 +1,6 @@
 """Read Chile's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Chile's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Colombia/__init__.py

@@ -1,6 +1,6 @@
 """Read Colombia's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Colombia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Indonesia/__init__.py

@@ -1,6 +1,6 @@
 """Read Indonesia's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Indonesia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Israel/__init__.py

@@ -1,6 +1,6 @@
 """Read Israel's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Israel's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Japan/__init__.py

@@ -1,6 +1,6 @@
 """Read Japan's national inventories
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Japans's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 2 - 2
src/unfccc_ghg_data/unfccc_reader/Japan/config_jpn_inv2024.py

@@ -348,12 +348,12 @@ cat_conversion = {
         },
         "3.A": {"sources": ["3.A.1", "3.A.2"]},
         "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"]},
-        "3.C.1": {"sources": ["3.C.1.AG"]},
+        "3.C.1": {"sources": ["M.3.C.1.AG"]},
         "M.3.C.AG": {"sources": ["3.C.1", "3.C.2", "3.C.3", "M.3.C.45.AG", "3.C.7"]},
         "3.C": {"sources": ["M.3.C.AG"]},
         "M.3.D.LU": {"sources": ["3.D.1", "M.3.D.2.LU"]},
         "3.D": {"sources": ["M.3.D.LU"]},
-        "M.AG.ELV": {"sources": ["3.C"]},
+        "M.AG.ELV": {"sources": ["M.3.C.AG"]},
         "3": {"sources": ["3.A", "3.B", "3.C", "3.D"]},
         "4": {"sources": ["4.A", "4.B", "4.C", "4.D", "4.E"]},
         # consistency check

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Malaysia/__init__.py

@@ -1,6 +1,6 @@
 """Read Malaysia's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Malaysia's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Mexico/__init__.py

@@ -1,6 +1,6 @@
 """Read Mexico's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Mexico's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Montenegro/__init__.py

@@ -1,6 +1,6 @@
 """Read Montenegro's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Montenegro's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Morocco/__init__.py

@@ -1,6 +1,6 @@
 """Read Morocco's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Morocco's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Nigeria/__init__.py

@@ -1,6 +1,6 @@
 """Read Nigeria's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Nigeria's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Peru/__init__.py

@@ -1,6 +1,6 @@
 """Read Peru's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Peru's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Republic_of_Korea/__init__.py

@@ -1,6 +1,6 @@
 """Read South Korea's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read South Korea's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Singapore/__init__.py

@@ -1,6 +1,6 @@
 """Read Singapore's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Singapore's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Taiwan/__init__.py

@@ -1,6 +1,6 @@
 """Read Taiwan's inventories
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Taiwan's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Thailand/__init__.py

@@ -1,6 +1,6 @@
 """Read Thailand's BURs, NIRs, NCs
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read Thailand's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/United_Kingdom/__init__.py

@@ -1,6 +1,6 @@
 """Read United Kingdom's national inventories
 
-Scripts and configurations to read Argentina's submissions to the UNFCCC.
+Scripts and configurations to read United Kingdoms's submissions to the UNFCCC.
 Currently, the following submissions and datasets are available (all datasets
 including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
 

+ 0 - 1
src/unfccc_ghg_data/unfccc_reader/United_Kingdom/read_GBR_2024_Inventory_from_xlsx.py

@@ -147,7 +147,6 @@ if __name__ == "__main__":
     )
 
     # adapt source and metadata
-    # TODO: processing info is present twice
     current_source = data_pm2_2006.coords["source"].to_numpy()[0]
     data_temp = data_pm2_2006.pr.loc[{"source": current_source}]
     data_pm2_2006 = data_pm2_2006.pr.set("source", "AI_INV", data_temp)

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/__init__.py

@@ -0,0 +1,30 @@
+"""Read United States of America's national inventories
+
+Scripts and configurations to read United States of America's submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'USA'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=USA
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 364 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/config_usa_inv2024.py

@@ -0,0 +1,364 @@
+"""Config for USA 2024 Inventroy
+
+General configuration for reading the inventory files for USA's official 2024
+inventory from xlsx
+
+"""
+
+gwp_to_use = "AR5GWP100"
+category_mapping = {
+    "Abandoned Oil and Gas Wells": "M.1.B.2.ab.6",
+    "Abandoned Underground Coal Mines": "1.B.1.a.i.3",
+    "Adipic Acid Production": "2.B.3",
+    "Agricultural Soil Management": "3.D",
+    "Aluminum Production": "2.C.3",
+    "Ammonia Production": "2.B.1",
+    "Anaerobic Digestion at Biogas Facilities": "5.B.2",
+    "Biomass and Biodiesel Consumptiona": "M.Memo.Bio",
+    #'CH4': '4',
+    "CH4c": "M.0.EL",
+    "CO2": "M.0.EL",
+    "Caprolactam, Glyoxal, and Glyoxylic Acid Production": "2.B.4",
+    "Carbide Production and Consumption": "2.B.5",
+    "Carbon Dioxide Consumption": "M.2.B.10.b",
+    "Cement Production": "2.A.1",
+    "Coal Mining": "M.1.B.1.a",  # abandoned underground mines are missing
+    "Commercial": "1.A.4.a",
+    "Composting": "5.B.1",
+    "Electric Power Sector": "1.A.1",
+    "Electrical Equipment": "2.G.1",
+    "Electronics Industry": "2.E",
+    "Enteric Fermentation": "3.A",
+    "Ferroalloy Production": "2.C.2",
+    "Field Burning of Agricultural Residues": "3.F",
+    "Fluorochemical Production": "2.B.9",
+    "Fossil Fuel Combustion": "1.A",
+    "Glass Production": "2.A.3",
+    "HFCs": "2",
+    "Incineration of Waste": "1.A.5.a.iv",
+    "Industrial": "1.A.2",
+    "International Bunker Fuelsb": "M.Memo.Int",
+    "Iron and Steel Production & Metallurgical Coke Production": "2.C.1",
+    # 'LULUCF Carbon Stock Changee': '',
+    # 'LULUCF Emissionsc': '',
+    # 'LULUCF Sector Net Totalf': '',
+    "Landfills": "5.A.1",
+    "Lead Production": "2.C.5",
+    "Lime Production": "2.A.2",
+    "Liming": "3.G",
+    "Magnesium Production and Processing": "2.C.4",
+    "Manure Management": "3.B",
+    "Mobile Combustion": "M.1.A.MOB",
+    #'N2O': '4',
+    "N2O from Product Uses": "2.G.3",
+    "N2Oc": "M.0.EL",
+    "NF3": "2",
+    "Natural Gas Systems": "M.1.B.2.b",  # abandoned wells missing
+    "Net Emissions (Sources and Sinks)": "0",
+    "Nitric Acid Production": "2.B.2",
+    "Non-Energy Use of Fuels": "1.A.5.a.iii",
+    "Other Process Uses of Carbonates": "2.A.4",
+    "PFCs": "2",
+    "Petrochemical Production": "2.B.8",
+    "Petroleum Systems": "M.1.B.2.a",  # abandoned wells missing
+    "Phosphoric Acid Production": "M.2.B.10.c",
+    "Residential": "1.A.4.b",
+    "Rice Cultivation": "3.C",
+    "SF6": "2",
+    "SF6 and PFCs from Other Product Use": "2.G.2",
+    "Soda Ash Production": "2.B.7",
+    "Stationary Combustion": "M.1.A.STAT",
+    "Substitution of Ozone Depleting Substances": "2.F",
+    "Titanium Dioxide Production": "2.B.6",
+    "Total Gross Emissions (Sources)": "M.0.EL",
+    "Transportation": "1.A.3",
+    "U.S. Territories": "1.A.5.a.v",
+    "Urea Consumption for Non-Agricultural Purposes": "M.2.B.10.a",
+    "Urea Fertilization": "3.H",
+    "Wastewater Treatment": "5.D",
+    "Zinc Production": "2.C.6",
+}
+
+category_col = "Gas/Source"
+inventory_files = {
+    "Table 2-1.csv": {
+        "CO2": None,
+        "CH4c": None,
+        "N2Oc": None,
+        "HFCs": {
+            "coords_defaults": {
+                "entity": f"HFCS ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "PFCs": {
+            "coords_defaults": {
+                "entity": f"PFCS ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "SF6": {
+            "coords_defaults": {
+                "entity": f"SF6 ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "NF3": {
+            "coords_defaults": {
+                "entity": f"NF3 ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "Total Gross Emissions (Sources)": {
+            "coords_defaults": {
+                "entity": f"KYOTOGHG ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "LULUCF Emissionsc": {
+            "coords_defaults": {"unit": "Mt CO2 / year"},
+            "coords_value_mapping": {
+                "entity": {
+                    #'LULUCF Emissionsc': '',
+                    "CH4": f"CH4 ({gwp_to_use})",
+                    "N2O": f"N2O ({gwp_to_use})",
+                    "LULUCF Carbon Stock Changee": "CO2",
+                    "LULUCF Sector Net Totalf": f"KYOTGHG ({gwp_to_use})",
+                },
+                "category": {
+                    #'LULUCF Emissionsc': '',
+                    "CH4": "4",
+                    "N2O": "4",
+                    "LULUCF Carbon Stock Changee": "4",
+                    "LULUCF Sector Net Totalf": "4",
+                },
+            },
+        },
+        "Net Emissions (Sources and Sinks)": {
+            "coords_defaults": {
+                "entity": f"KYOTOGHG ({gwp_to_use})",
+                "unit": "Mt CO2 / year",
+            },
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "+ Does not exceed 0.05 MMT CO2 Eq.": None,
+    },
+    "Table 2-2.csv": {
+        "CO2": {
+            "coords_defaults": {"entity": "CO2", "unit": "kt CO2 / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "CH4c": {
+            "coords_defaults": {"entity": "CH4", "unit": "kt CH4 / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "N2Oc": {
+            "coords_defaults": {"entity": "N2O", "unit": "kt N2O / year"},
+            "coords_value_mapping": {
+                "category": category_mapping,
+            },
+        },
+        "HFCs": None,
+        "PFCs": None,
+        "SF?": None,
+        "NF?": None,
+        "+ Does not exceed 0.5 kt.": None,
+    },
+}
+
+time_format = "%Y"
+
+coords_cols_template = {
+    "category": category_col,
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "CRF2013_2023",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults_template = {
+    "source": "USA-GHG-Inventory",
+    "provenance": "measured",
+    "area": "USA",
+    "scenario": "2024INV",
+}
+
+meta_data = {
+    "references": "https://www.epa.gov/ghgemissions/"
+    "inventory-us-greenhouse-gas-emissions-and-sinks-1990-2022",
+    "rights": "",
+    "contact": "johannes.guetschow@climate-resource.com",
+    "title": "Inventory of U.S. Greenhouse Gas Emissions and Sinks: 1990-2022",
+    "comment": "Read fom csv files by Johannes Gütschow",
+    "institution": "United States Environmental Protection Agency",
+}
+
+filter_remove = {
+    "f1": {
+        category_col: [
+            "LULUCF Emissionsc",
+        ]
+    }
+}
+
+### processing
+
+cat_conversion = {
+    "mapping": {
+        "0": "0",
+        "1.A": "1.A",
+        "1.A.1": "1.A.1",
+        "1.A.2": "1.A.2",
+        "1.A.3": "1.A.3",
+        "1.A.4.a": "1.A.4.a",
+        "1.A.4.b": "1.A.4.b",
+        "1.A.5.a.iii": "2.D",  # non energy fuel use
+        "1.A.5.a.iv": "M.1.A.5.a.iv",  # waste incineration
+        "1.A.5.a.v": "1.A.5.a.v",  # US Territories
+        "1.B.1.a.i.3": "1.B.1.a.i.3",
+        "2": "2",
+        "2.A.1": "2.A.1",
+        "2.A.2": "2.A.2",
+        "2.A.3": "2.A.3",
+        "2.A.4": "2.A.4",
+        "2.B.1": "2.B.1",
+        "2.B.2": "2.B.2",
+        "2.B.3": "2.B.3",
+        "2.B.4": "2.B.4",
+        "2.B.5": "2.B.5",
+        "2.B.6": "2.B.6",
+        "2.B.7": "2.B.7",
+        "2.B.8": "2.B.8",
+        "2.B.9": "2.B.9",
+        "2.C.1": "2.C.1",
+        "2.C.2": "2.C.2",
+        "2.C.3": "2.C.3",
+        "2.C.4": "2.C.4",
+        "2.C.5": "2.C.5",
+        "2.C.6": "2.C.6",
+        "2.E": "2.E",
+        "2.F": "2.F",
+        "2.G.1": "2.G.1",
+        "2.G.2": "2.G.2",
+        "2.G.3": "2.G.3",
+        "3.A": "3.A.1",
+        "3.B": "3.A.2",
+        "3.C": "3.C.7",
+        "3.D": "M.3.C.45.AG",
+        "3.F": "M.3.C.1.AG",
+        "3.G": "3.C.2",
+        "3.H": "3.C.3",
+        "4": "M.LULUCF",
+        "5.A.1": "4.A.1",
+        "5.B.1": "4.B.1",
+        "5.B.2": "4.B.2",
+        "5.D": "4.D",
+        "M.0.EL": "M.0.EL",
+        "M.1.A.MOB": "M.1.A.MOB",
+        "M.1.A.STAT": "M.1.A.STAT",
+        "M.1.B.1.a": "M.1.B.1.a",
+        "M.1.B.2.a": "M.1.B.2.a",
+        "M.1.B.2.ab.6": "M.1.B.2.ab.6",
+        "M.1.B.2.b": "M.1.B.2.b",
+        "M.2.B.10.a": "M.2.B.10.a",
+        "M.2.B.10.b": "M.2.B.10.b",
+        "M.2.B.10.c": "M.2.B.10.c",
+        "M.Memo.Bio": "M.BIO",
+        "M.Memo.Int": "M.BK",
+    },
+    "aggregate": {
+        # 1
+        "1.A.4": {"sources": ["1.A.4.a", "1.A.4.b"]},
+        "1.A.5.a": {"sources": ["M.1.A.5.a.iv", "M.1.A.5.a.v"]},
+        "1.A.5": {"sources": ["1.A.5.a"]},
+        "1.A": {
+            "sources": ["M.1.A.MOB", "M.1.A.STAT", "1.A.5"],
+            "filter": {"entity": ["CH4", "N2O"]},
+        },
+        "1.B.1": {"sources": ["M.1.B.1.a", "1.B.1.a.i.3"]},
+        "1.B.2": {"sources": ["M.1.B.2.a", "M.1.B.2.b", "M.1.B.2.ab.6"]},
+        "1.B": {"sources": ["1.B.1", "1.B.2"]},
+        "1": {"sources": ["1.A", "1.B"]},
+        # 2
+        "2.A": {"sources": ["2.A.1", "2.A.2", "2.A.3", "2.A.4"]},
+        "2.B.10": {"sources": ["M.2.B.10.a", "M.2.B.10.b", "M.2.B.10.c"]},
+        "2.B": {
+            "sources": [
+                "2.B.1",
+                "2.B.2",
+                "2.B.3",
+                "2.B.4",
+                "2.B.5",
+                "2.B.6",
+                "2.B.7",
+                "2.B.8",
+                "2.B.9",
+                "2.B.10",
+            ]
+        },
+        "2.C": {"sources": ["2.C.1", "2.C.2", "2.C.3", "2.C.4", "2.C.5", "2.C.6"]},
+        "2.G": {"sources": ["2.G.1", "2.G.2", "2.G.3"]},
+        "2": {
+            "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G"],
+            "tolerance": 0.251,  # rounding inconsistencies in NF3 and PFCs after 2008
+        },
+        # M.AG
+        "3.A": {"sources": ["3.A.1", "3.A.2"]},
+        "3.C.1": {"sources": ["M.3.C.1.AG"]},
+        "M.3.C.AG": {"sources": ["3.C.1", "3.C.2", "3.C.3", "M.3.C.45.AG", "3.C.7"]},
+        "3.C": {"sources": ["M.3.C.AG"]},
+        "M.AG.ELV": {"sources": ["M.3.C.AG"]},
+        "M.AG": {"sources": ["M.AG.ELV", "3.A"]},
+        # 3
+        "3": {"sources": ["M.AG", "M.LULUCF"]},
+        # 4
+        "4.A": {"sources": ["4.A.1"]},
+        "4.B": {"sources": ["4.B.1", "4.B.2"]},
+        "4": {"sources": ["4.A", "4.B", "4.D"]},
+        # consistency check
+        "0": {"sources": ["1", "2", "3", "4"]},
+        "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},
+    },
+}
+
+basket_copy = {
+    "GWPs_to_add": ["SARGWP100", "AR4GWP100", "AR6GWP100"],
+    "entities": ["HFCS", "PFCS"],
+    "source_GWP": gwp_to_use,
+}
+
+gas_baskets = {
+    "FGASES (SARGWP100)": ["HFCS (SARGWP100)", "PFCS (SARGWP100)", "SF6", "NF3"],
+    "FGASES (AR4GWP100)": ["HFCS (AR4GWP100)", "PFCS (AR4GWP100)", "SF6", "NF3"],
+    "FGASES (AR5GWP100)": ["HFCS (AR5GWP100)", "PFCS (AR5GWP100)", "SF6", "NF3"],
+    "FGASES (AR6GWP100)": ["HFCS (AR6GWP100)", "PFCS (AR6GWP100)", "SF6", "NF3"],
+    "KYOTOGHG (SARGWP100)": ["CO2", "CH4", "N2O", "FGASES (SARGWP100)"],
+    "KYOTOGHG (AR4GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR4GWP100)"],
+    "KYOTOGHG (AR5GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR5GWP100)"],
+    "KYOTOGHG (AR6GWP100)": ["CO2", "CH4", "N2O", "FGASES (AR6GWP100)"],
+}
+
+terminology_proc = "IPCC2006_PRIMAP"

+ 182 - 0
src/unfccc_ghg_data/unfccc_reader/United_States_of_America/read_USA_2024_Inventory_from_xlsx.py

@@ -0,0 +1,182 @@
+"""
+Read USA's 2024 inventory from xlsx
+
+Files available here: https://www.epa.gov/ghgemissions/
+inventory-us-greenhouse-gas-emissions-and-sinks-1990-2022
+
+Only the overview tables are read as details are in several individual tables and
+overview is sufficient for PRIMAP-hist
+
+"""
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    downloaded_data_path,
+    extracted_data_path,
+    process_data_for_country,
+)
+from unfccc_ghg_data.unfccc_reader.United_States_of_America.config_usa_inv2024 import (
+    basket_copy,
+    cat_conversion,
+    category_col,
+    coords_cols_template,
+    coords_defaults_template,
+    coords_terminologies,
+    filter_remove,
+    gas_baskets,
+    inventory_files,
+    meta_data,
+    terminology_proc,
+    time_format,
+)
+
+if __name__ == "__main__":
+    pd.set_option("future.no_silent_downcasting", True)
+
+    # ###
+    # configuration
+    # ###
+
+    # folders and files
+    input_folder = (
+        downloaded_data_path
+        / "non-UNFCCC"
+        / "United_States_of_America"
+        / "2024-Inventory"
+        / "main-text-tables"
+        / "trends"
+    )
+    output_folder = extracted_data_path / "non-UNFCCC" / "United_States_of_America"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "USA_2024-Inventory_"
+    compression = dict(zlib=True, complevel=9)
+
+    # ###
+    # start data reading
+    # ###
+
+    data_pm2 = None
+
+    for file in inventory_files.keys():
+        data_current_pd = pd.read_csv(input_folder / file, header=[1])
+        # remove the thousands separators (can't be done during reading as data is
+        # stored as string)
+        all_cols = data_current_pd.columns
+        data_cols = [col for col in all_cols if col != category_col]
+        for col in data_cols:
+            if data_current_pd.dtypes[col] == "object":
+                data_current_pd[col] = data_current_pd[col].str.replace(",", "")
+
+        section_keys = inventory_files[file].keys()
+        key_info = {}
+        last_key = None
+        for i, row in data_current_pd.iterrows():
+            if row[category_col] in section_keys:
+                key_info[row[category_col]] = {}
+                key_info[row[category_col]]["start"] = i
+                if last_key is not None:
+                    key_info[last_key]["end"] = i
+                last_key = row[category_col]
+
+        for section_key in section_keys:
+            current_config = inventory_files[file][section_key]
+            if current_config is not None:
+                # get the data
+                if "end" in key_info[section_key].keys():
+                    data_section = data_current_pd.iloc[
+                        key_info[section_key]["start"] : key_info[section_key]["end"]
+                    ].copy()
+                else:
+                    data_section = data_current_pd.iloc[
+                        key_info[section_key]["start"] :
+                    ].copy()
+
+                # convert to primap2 IF
+                coords_defaults = coords_defaults_template.copy()
+                coords_defaults.update(current_config["coords_defaults"])
+                coords_value_mapping = current_config["coords_value_mapping"]
+                coords_cols = coords_cols_template.copy()
+                if "entity" in coords_value_mapping:
+                    # make a copy of the category column as we also need if for entity
+                    data_section["entity"] = data_section[category_col]
+                    coords_cols["entity"] = "entity"
+
+                data_section_if = pm2.pm2io.convert_wide_dataframe_if(
+                    data_section,
+                    coords_cols=coords_cols,
+                    coords_terminologies=coords_terminologies,
+                    coords_defaults=coords_defaults,
+                    coords_value_mapping=coords_value_mapping,
+                    filter_remove=filter_remove,
+                    meta_data=meta_data,
+                    time_format=time_format,
+                )
+                # convert to primap2 native format
+                data_section_pm2 = pm2.pm2io.from_interchange_format(data_section_if)
+
+                # merge with other data
+                if data_pm2 is None:
+                    data_pm2 = data_section_pm2
+                else:
+                    data_pm2 = data_pm2.pr.merge(data_section_pm2)
+
+    # convert back to IF to have units in the fixed format
+    data_if = data_pm2.pr.to_interchange_format()
+
+    # ###
+    # save data to IF and native format
+    # ###
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"]), data_if
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+        encoding=encoding,
+    )
+
+    # ###
+    # conversion to ipcc 2006 categories
+    # ###
+
+    data_pm2_2006 = data_pm2.copy()
+
+    # actual processing
+
+    country_processing = {
+        "basket_copy": basket_copy,
+    }
+
+    data_pm2_2006 = process_data_for_country(
+        data_pm2_2006,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        processing_info_country=country_processing,
+        cat_terminology_out=terminology_proc,
+        category_conversion=cat_conversion,
+        # sectors_out=sectors_to_save,
+    )
+
+    # adapt source and metadata
+    current_source = data_pm2_2006.coords["source"].to_numpy()[0]
+    data_temp = data_pm2_2006.pr.loc[{"source": current_source}]
+    data_pm2_2006 = data_pm2_2006.pr.set("source", "AI_INV", data_temp)
+    data_pm2_2006 = data_pm2_2006.pr.loc[{"source": ["AI_INV"]}]
+
+    # convert back to IF to have units in the fixed format
+    data_if_2006 = data_pm2_2006.pr.to_interchange_format()
+
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_if_2006,
+    )
+
+    encoding = {var: compression for var in data_pm2_2006.data_vars}
+    data_pm2_2006.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )