@@ -14,6 +14,7 @@ from datetime import date
import xarray as xr
from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path
+from UNFCCC_GHG_data.helper.functions import find_and_replace_values
from config_GIN_BUR1 import coords_cols, coords_defaults, coords_terminologies
from config_GIN_BUR1 import (
@@ -21,7 +22,7 @@ from config_GIN_BUR1 import (
-from config_GIN_BUR1 import inv_conf, country_processing_step1, gas_baskets
+from config_GIN_BUR1 import inv_conf, country_processing_step1, gas_baskets, replace_info
# ###
# configuration
@@ -29,7 +30,7 @@ from config_GIN_BUR1 import inv_conf, country_processing_step1, gas_baskets
input_folder = downloaded_data_path / "UNFCCC" / "Guinea" / "BUR1"
output_folder = extracted_data_path / "UNFCCC" / "Guinea"
-if not output_folder.exists():
+if not output_folder.exists() :
pdf_file = "Rapport_IGES-Guinee-BUR1_VF.pdf"
@@ -43,7 +44,7 @@ compression = dict(zlib=True, complevel=9)
pages = ["110", "111", "112", "113"]
df_main = None
-for page in pages:
+for page in pages :
print("-" * 45)
print(f"Reading table from page {page}.")
@@ -61,7 +62,7 @@ for page in pages:
df_inventory = tables_inventory_original[0].df.copy()
# move broken text in correct row (page 113 is fine)
- if page in ["110", "111", "112"]:
+ if page in ["110", "111", "112"] :
df_inventory.at[4, 0] = "1.A.1 - Industries énergétiques"
df_inventory = df_inventory.drop(index=3)
df_inventory.at[8, 0] = "1.A.4 - Autres secteurs"
@@ -103,8 +104,12 @@ for page in pages:
df_inventory_long["category"] = df_inventory_long["category"].str.replace(".", "")
# regex replacements
- repl = lambda m: m.group("code")
+ def repl(m) :
+ return m.group("code")
df_inventory_long["category"] = df_inventory_long["category"].str.replace(
inv_conf["cat_code_regexp"], repl, regex=True
@@ -118,9 +123,9 @@ for page in pages:
df_inventory_long.columns = df_inventory_long.columns.map(str)
df_inventory_long = df_inventory_long.drop(columns=["orig_cat_name"])
- if df_main is None:
+ if df_main is None :
df_main = df_inventory_long
- else:
+ else :
df_main = pd.concat(
[df_main, df_inventory_long],
@@ -140,85 +145,10 @@ df_all_IF = pm2.pm2io.convert_long_dataframe_if(
-# There are inconsistent values in the main and the afolu table
-# It looks like they put the values from 1990 again for 2019 in the main table.
-# The values from the afolu table are assumed to be the correct ones.
- (df_all_IF[category_column] == "3") & (df_all_IF["entity"] == "CO"),
- "2019",
-] = 27.406
- (df_all_IF[category_column] == "3.C") & (df_all_IF["entity"] == "CO"),
- "2019",
-] = 27.406
- (df_all_IF[category_column] == "3.C.1") & (df_all_IF["entity"] == "CO"),
- "2019",
-] = 27.406
-# Values for category 3 and N2O are identical for 1990 and 2019
-# The sum of the sub-categories does not equal the value of the parent category
-# The value in the afolu table should therefore be the correct one
- (df_all_IF[category_column] == "3") & (df_all_IF["entity"] == "N2O"),
- "1990",
-] = 2.190
-# Values for category 3 and NOx are identical for 1990 and 2019
-# Replacing the duplicate value with the value from the afolu table
- (df_all_IF[category_column] == "3") & (df_all_IF["entity"] == "NOx"),
- "2019",
-] = 1.644
- (df_all_IF[category_column] == "3.C") & (df_all_IF["entity"] == "NOx"),
- "2019",
-] = 1.644
- (df_all_IF[category_column] == "3.C.1") & (df_all_IF["entity"] == "NOx"),
- "2019",
-] = 1.644
-# International bunkers
-# NOx
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NOx"),
- "1990",
-] = 0.001
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NOx"),
- "2000",
-] = 0.003
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NOx"),
- "2010",
-] = 0.052
-# CO
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "CO"),
- "1990",
-] = 0.0002
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "CO"),
- "2000",
-] = 0.0006
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "CO"),
- "2010",
-] = 0.01
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NMVOC"),
- "1990",
-] = 0.0001
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NMVOC"),
- "2000",
-] = 0.0002
- (df_all_IF[category_column] == "M.BK") & (df_all_IF["entity"] == "NMVOC"),
- "2010",
-] = 0.003
+df_all_IF = find_and_replace_values(df=df_all_IF,
+ replace_info=replace_info['main'],
+ category_column=category_column
+ )
### convert to primap2 format ###
data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
@@ -229,7 +159,7 @@ data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)
pages = ["116", "117", "118", "119"]
df_energy = None
-for page in pages:
+for page in pages :
print("-" * 45)
print(f"Reading table from page {page}.")
@@ -241,7 +171,7 @@ for page in pages:
# cut last two lines of second table to ignore additional information regarding biomass for energy production
df_energy_year = pd.concat(
- [tables_inventory_original[0].df[2:], tables_inventory_original[1].df[3:-2]],
+ [tables_inventory_original[0].df[2 :], tables_inventory_original[1].df[3 :-2]],
@@ -249,19 +179,19 @@ for page in pages:
row_to_delete = df_energy_year.index[
== "1.A.3.a.i - Aviation internationale (Soutes internationales)"
- ][0]
+ ][0]
df_energy_year = df_energy_year.drop(index=row_to_delete)
row_to_delete = df_energy_year.index[
== "1.A.3.d.i - Navigation internationale (soutes internationales)"
- ][0]
+ ][0]
df_energy_year = df_energy_year.drop(index=row_to_delete)
row_to_delete = df_energy_year.index[
== "1.A.5.c - Opérations multilatérales (Éléments pour information)"
- ][0]
+ ][0]
df_energy_year = df_energy_year.drop(index=row_to_delete)
# add header and unit
@@ -309,8 +239,12 @@ for page in pages:
".", ""
# then the regex replacements
- repl = lambda m: m.group("code")
+ def repl(m) :
+ return m.group("code")
df_energy_year_long["category"] = df_energy_year_long["category"].str.replace(
inv_conf["cat_code_regexp"], repl, regex=True
@@ -324,9 +258,9 @@ for page in pages:
df_energy_year_long.columns = df_energy_year_long.columns.map(str)
df_energy_year_long = df_energy_year_long.drop(columns=["orig_cat_name"])
- if df_energy is None:
+ if df_energy is None :
df_energy = df_energy_year_long
- else:
+ else :
df_energy = pd.concat(
[df_energy, df_energy_year_long],
@@ -349,14 +283,13 @@ df_energy_IF = pm2.pm2io.convert_long_dataframe_if(
### convert to primap2 format ###
data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)
# ###
# 3. Read in afolu table
# ###
pages = ["124", "125", "126", "127"]
df_afolu = None
-for page in pages:
+for page in pages :
print("-" * 45)
print(f"Reading table from page {page}.")
@@ -365,10 +298,10 @@ for page in pages:
print("Reading complete.")
- if page == "127":
+ if page == "127" :
# table on page 127 has one extra row at the top
# and one extra category 3.A.1.j
- df_afolu_year = tables_inventory_original[0].df[3:]
+ df_afolu_year = tables_inventory_original[0].df[3 :]
# 3.A.1.a.i to 3.A.1.j exist twice.
# Rename duplicate categories in tables.
replace_categories = [
@@ -384,11 +317,11 @@ for page in pages:
(28, "3.A.2.i - Volailles"),
(29, "3.A.2.j - Autres (préciser)"),
- for index, category_name in replace_categories:
+ for index, category_name in replace_categories :
df_afolu_year.at[index, 0] = category_name
- else:
+ else :
# cut first two lines
- df_afolu_year = tables_inventory_original[0].df[2:]
+ df_afolu_year = tables_inventory_original[0].df[2 :]
# On pages 124-126 the wrong categories are slightly different
replace_categories = [
(17, "3.A.2.a.i - Vaches laitières"),
@@ -402,7 +335,7 @@ for page in pages:
(25, "3.A.2.h - Porcins"),
(26, "3.A.2.i - Volailles"),
- for index, category_name in replace_categories:
+ for index, category_name in replace_categories :
df_afolu_year.at[index, 0] = category_name
# add header and unit
@@ -439,8 +372,12 @@ for page in pages:
# make a copy of the categories row
df_afolu_year_long["category"] = df_afolu_year_long["orig_cat_name"]
# regex replacements
- repl = lambda m: m.group("code")
+ def repl(m) :
+ return m.group("code")
df_afolu_year_long["category"] = df_afolu_year_long["category"].str.replace(
inv_conf["cat_code_regexp"], repl, regex=True
@@ -454,9 +391,9 @@ for page in pages:
df_afolu_year_long.columns = df_afolu_year_long.columns.map(str)
df_afolu_year_long = df_afolu_year_long.drop(columns=["orig_cat_name"])
- if df_afolu is None:
+ if df_afolu is None :
df_afolu = df_afolu_year_long
- else:
+ else :
df_afolu = pd.concat(
[df_afolu, df_afolu_year_long],
@@ -500,18 +437,18 @@ tables_inventory_original_130 = camelot.read_pdf(
# save to dict
df_waste_years = {
- "1990": tables_inventory_original_128[0].df,
- "2000": tables_inventory_original_128[1].df,
- "2010": tables_inventory_original_128[2].df,
- "2019": tables_inventory_original_130[0].df,
+ "1990" : tables_inventory_original_128[0].df,
+ "2000" : tables_inventory_original_128[1].df,
+ "2010" : tables_inventory_original_128[2].df,
+ "2019" : tables_inventory_original_130[0].df,
df_waste = None
-for year in df_waste_years.keys():
+for year in df_waste_years.keys() :
print("-" * 45)
print(f"Processing table for {year}.")
- df_waste_year = df_waste_years[year][2:]
+ df_waste_year = df_waste_years[year][2 :]
# add header and unit
df_header = pd.DataFrame([inv_conf["header_waste"], inv_conf["unit_waste"]])
@@ -545,8 +482,12 @@ for year in df_waste_years.keys():
# make a copy of the categories row
df_waste_year_long["category"] = df_waste_year_long["orig_cat_name"]
# regex replacements
- repl = lambda m: m.group("code")
+ def repl(m) :
+ return m.group("code")
df_waste_year_long["category"] = df_waste_year_long["category"].str.replace(
inv_conf["cat_code_regexp"], repl, regex=True
@@ -561,9 +502,9 @@ for year in df_waste_years.keys():
df_waste_year_long.columns = df_waste_year_long.columns.map(str)
df_waste_year_long = df_waste_year_long.drop(columns=["orig_cat_name"])
- if df_waste is None:
+ if df_waste is None :
df_waste = df_waste_year_long
- else:
+ else :
df_waste = pd.concat(
[df_waste, df_waste_year_long],
@@ -595,7 +536,7 @@ pages = ["131", "132", "133", "134", "135", "136", "137"]
entities = ["CO2", "CH4", "N2O", "NOx", "CO", "NMVOCs", "SO2"]
# for this set of tables every page is a different entity
-for page, entity in zip(pages, entities):
+for page, entity in zip(pages, entities) :
# The table for CO seems completely mixed up and should not be considered.
# The total CO values for 1990 equal the values in the main table.
# The total CO values for 1995 equal the values for 2000 in the main table.
@@ -604,7 +545,7 @@ for page, entity in zip(pages, entities):
# The total CO values for 2010 are identical to the 1990 values in the same table.
# The total CO values for 2019 are identical to the 1995 values in the same table.
# And so on.
- if entity == "CO":
+ if entity == "CO" :
print("-" * 45)
@@ -615,7 +556,7 @@ for page, entity in zip(pages, entities):
# see https://github.com/atlanhq/camelot/issues/306,
# or because characters in first row almost touch
# the table grid.
- if page == "131":
+ if page == "131" :
tables_inventory_original = camelot.read_pdf(
str(input_folder / pdf_file),
@@ -625,7 +566,7 @@ for page, entity in zip(pages, entities):
- df_trend_entity = tables_inventory_original[0].df[1:]
+ df_trend_entity = tables_inventory_original[0].df[1 :]
# The categories 3.D / 3.D.1 / 3.D.2 contain values different to the main table
# They should also not contain negative values according to IPCC methodology:
@@ -636,19 +577,19 @@ for page, entity in zip(pages, entities):
row_to_delete = df_trend_entity.index[
df_trend_entity[0] == "3.D.1 - Produits ligneux récoltés"
- ][0]
+ ][0]
df_trend_entity = df_trend_entity.drop(index=row_to_delete)
row_to_delete = df_trend_entity.index[
df_trend_entity[0] == "3.D.2 - Autres (veuillez spécifier)"
- ][0]
+ ][0]
df_trend_entity = df_trend_entity.drop(index=row_to_delete)
- else:
+ else :
tables_inventory_original = camelot.read_pdf(
str(input_folder / pdf_file), pages=page, flavor="lattice", split_text=True
- df_trend_entity = tables_inventory_original[0].df[3:]
+ df_trend_entity = tables_inventory_original[0].df[3 :]
print("Reading complete.")
@@ -677,7 +618,7 @@ for page, entity in zip(pages, entities):
df_trend_entity.loc[:, "category"] = df_trend_entity["orig_cat_name"]
# Delete empty line for pages 132-137.
- if page != "131":
+ if page != "131" :
row_to_delete = df_trend_entity.index[df_trend_entity["category"] == ""][0]
df_trend_entity = df_trend_entity.drop(index=row_to_delete)
@@ -692,7 +633,11 @@ for page, entity in zip(pages, entities):
"\n", ""
- repl = lambda m: m.group("code")
+ def repl(m) :
+ return m.group("code")
df_trend_entity.loc[:, "category"] = df_trend_entity["category"].str.replace(
inv_conf["cat_code_regexp"], repl, regex=True
@@ -701,7 +646,7 @@ for page, entity in zip(pages, entities):
print("Created category codes.")
- for year in columns_years:
+ for year in columns_years :
df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace(",", ".")
df_trend_entity.loc[:, year] = df_trend_entity[year].str.replace("NE1", "NE")
@@ -719,9 +664,9 @@ for page, entity in zip(pages, entities):
df_trend_entity_long = df_trend_entity_long.reset_index()
- if df_trend is None:
+ if df_trend is None :
df_trend = df_trend_entity_long
- else:
+ else :
df_trend = pd.concat(
[df_trend, df_trend_entity_long],
@@ -742,127 +687,9 @@ df_trend_IF = pm2.pm2io.convert_long_dataframe_if(
-# CH4 - values in main table are assumed to be correct
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "CH4"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "CH4"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "CH4"),
- "2000",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "CH4"),
- "2000",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "CH4"),
- "2010",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "CH4"),
- "2010",
-] = np.nan
-# N2O - values in main table are assumed to be correct
- (df_trend_IF[category_column] == "1.A.2") & (df_trend_IF["entity"] == "N2O"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "N2O"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "N2O"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "N2O"),
- "2000",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "N2O"),
- "2000",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "N2O"),
- "2010",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "N2O"),
- "2010",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "N2O"),
- "2019",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK.A") & (df_trend_IF["entity"] == "N2O"),
- "2019",
-] = np.nan
-# NOx - values in main table are assumed to be correct
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "NOx"),
- "1990",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "NOx"),
- "2000",
-] = np.nan
- (df_trend_IF[category_column] == "M.BK") & (df_trend_IF["entity"] == "NOx"),
- "2010",
-] = np.nan
- (df_trend_IF[category_column] == "3.C") & (df_trend_IF["entity"] == "NOx"),
- "2019",
-] = np.nan
- (df_trend_IF[category_column] == "3.C.1") & (df_trend_IF["entity"] == "NOx"),
- "2019",
-] = np.nan
- (df_trend_IF[category_column] == "3") & (df_trend_IF["entity"] == "NOx"),
- "2019",
-] = np.nan
-# NMVOC - values in main table are assumed to be correct
-entity = "NMVOC"
-for category, year in [
- ("1.A.2", "1990"),
- ("M.BK", "1990"),
- ("0", "2000"),
- ("1", "2000"),
- ("1.A", "2000"),
- ("1.A.1", "2000"),
- ("1.A.2", "2000"),
- ("1.A.3", "2000"),
- ("1.A.4", "2000"),
- ("2", "2000"),
- ("2.H", "2000"),
- ("2.H.2", "2000"),
- ("M.BK", "2000"),
- ("0", "2010"),
- ("1", "2010"),
- ("1.A", "2010"),
- ("1.A.1", "2010"),
- ("1.A.2", "2010"),
- ("1.A.3", "2010"),
- ("1.A.4", "2010"),
- ("2", "2010"),
- ("M.BK", "2010"),
- ("1.A.2", "2019"),
- df_trend_IF.loc[
- (df_trend_IF[category_column] == category) & (df_trend_IF["entity"] == entity),
- year,
- ] = np.nan
+df_trend_IF = find_and_replace_values(df=df_trend_IF,
+ replace_info=replace_info["trend"],
+ category_column=category_column)
### convert to primap2 format ###
data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
@@ -875,16 +702,20 @@ data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)
# There are discrepancies larger than 0.86 for area category 1.A.2, entity NMVOC,
# years 1990, 2000, 2010, 2019
# It is assumed the main table has the correct values.
+print("Merging main and energy table.")
data_pm2 = data_pm2_main.pr.merge(data_pm2_energy, tolerance=1)
# merge afolu
+print("Merging afolu table.")
data_pm2 = data_pm2.pr.merge(data_pm2_afolu, tolerance=0.11)
# merge waste
# increasing tolerance to merge values for 4.C, 1990, N2O - 0.003 in sector table, 0.0034 in main table
+print("Merging waste table.")
data_pm2 = data_pm2.pr.merge(data_pm2_waste, tolerance=0.15)
# merge trend
+print("Merging trend table.")
data_pm2 = data_pm2.pr.merge(data_pm2_trend, tolerance=0.11)
# convert back to IF to have units in the fixed format ( per year / per a / per annum)
@@ -899,13 +730,12 @@ pm2.pm2io.write_interchange_format(
-encoding = {var: compression for var in data_pm2.data_vars}
+encoding = {var : compression for var in data_pm2.data_vars}
output_folder / (output_filename + coords_terminologies["category"] + "_raw.nc"),
# ###
# Processing
# ###
@@ -917,12 +747,12 @@ processing_info_country = country_processing_step1
data_country = data_pm2
countries = list(data_country.coords[data_country.attrs["area"]].values)
-if len(countries) > 1:
+if len(countries) > 1 :
raise ValueError(
f"Found {len(countries)} countries. Only single country data "
f"can be processed by this function. countries: {countries}"
+else :
country_code = countries[0]
# get category terminology
@@ -932,7 +762,7 @@ cat_terminology_in = temp[0]
# get scenario
scenarios = list(data_country.coords[data_country.attrs["scen"]].values)
-if len(scenarios) > 1:
+if len(scenarios) > 1 :
raise ValueError(
f"Found {len(scenarios)} scenarios. Only single scenario data "
f"can be processed by this function. Scenarios: {scenarios}"
@@ -941,7 +771,7 @@ scenario = scenarios[0]
# get source
sources = list(data_country.coords["source"].values)
-if len(sources) > 1:
+if len(sources) > 1 :
raise ValueError(
f"Found {len(sources)} sources. Only single source data "
f"can be processed by this function. Sources: {sources}"
@@ -949,9 +779,9 @@ if len(sources) > 1:
source = sources[0]
# check if category name column present
-if "orig_cat_name" in data_country.coords:
+if "orig_cat_name" in data_country.coords :
cat_name_present = True
+else :
cat_name_present = False
# 1: general processing
@@ -977,38 +807,38 @@ print(
f"Aggregating categories for country {country_code}, source {source}, "
f"scenario {scenario}"
-for cat_to_agg in aggregate_cats_current:
+for cat_to_agg in aggregate_cats_current :
print(f"Category: {cat_to_agg}")
source_cats = aggregate_cats_current[cat_to_agg]["sources"]
- data_agg = data_country.pr.loc[{"category": source_cats}].pr.sum(
+ data_agg = data_country.pr.loc[{"category" : source_cats}].pr.sum(
dim="category", skipna=True, min_count=1
nan_vars = [
var for var in data_agg.data_vars if data_agg[var].isnull().all().data is True
data_agg = data_agg.drop(nan_vars)
- if len(data_agg.data_vars) > 0:
+ if len(data_agg.data_vars) > 0 :
data_agg = data_agg.expand_dims([f"category (" f"{cat_terminology_in})"])
data_agg = data_agg.assign_coords(
- f"category ({cat_terminology_in})": (
+ f"category ({cat_terminology_in})" : (
f"category ({cat_terminology_in})",
- if cat_name_present:
+ if cat_name_present :
cat_name = aggregate_cats_current[cat_to_agg]["name"]
data_agg = data_agg.assign_coords(
- "orig_cat_name": (
+ "orig_cat_name" : (
f"category ({cat_terminology_in})",
data_country = data_country.pr.merge(data_agg, tolerance=agg_tolerance)
- else:
+ else :
print(f"no data to aggregate category {cat_to_agg}")
from UNFCCC_GHG_data.helper import GWP_factors
@@ -1017,9 +847,9 @@ from UNFCCC_GHG_data.helper import GWP_factors
GWPs_to_add = country_processing_step1["basket_copy"]["GWPs_to_add"]
entities = country_processing_step1["basket_copy"]["entities"]
source_GWP = country_processing_step1["basket_copy"]["source_GWP"]
-for entity in entities:
+for entity in entities :
data_source = data_country[f"{entity} ({source_GWP})"]
- for GWP in GWPs_to_add:
+ for GWP in GWPs_to_add :
data_GWP = data_source * GWP_factors[f"{source_GWP}_to_{GWP}"][entity]
data_GWP.attrs["entity"] = entity
data_GWP.attrs["gwp_context"] = GWP
@@ -1027,27 +857,27 @@ for entity in entities:
# create gas baskets
entities_present = set(data_country.data_vars)
-for basket in gas_baskets.keys():
+for basket in gas_baskets.keys() :
basket_contents_present = [
gas for gas in gas_baskets[basket] if gas in entities_present
- if len(basket_contents_present) > 0:
- if basket in list(data_country.data_vars):
+ if len(basket_contents_present) > 0 :
+ if basket in list(data_country.data_vars) :
data_country[basket] = data_country.pr.fill_na_gas_basket_from_contents(
- else:
- try:
+ else :
+ try :
# print(data_country.data_vars)
data_country[basket] = xr.full_like(
data_country["CO2"], np.nan
).pr.quantify(units="Gg CO2 / year")
data_country[basket].attrs = {
- "entity": basket.split(" ")[0],
- "gwp_context": basket.split(" ")[1][1:-1],
+ "entity" : basket.split(" ")[0],
+ "gwp_context" : basket.split(" ")[1][1 :-1],
data_country[basket] = data_country.pr.gas_basket_contents_sum(
@@ -1055,22 +885,20 @@ for basket in gas_baskets.keys():
- except Exception as ex:
+ except Exception as ex :
f"No gas basket created for {country_code}, {source}, "
f"{scenario}: {ex}"
# amend title and comment
data_country.attrs["comment"] = (
- data_country.attrs["comment"] + f" Processed on " f"{date.today()}"
+ data_country.attrs["comment"] + f" Processed on " f"{date.today()}"
data_country.attrs["title"] = (
- data_country.attrs["title"] + f" Processed on " f"{date.today()}"
+ data_country.attrs["title"] + f" Processed on " f"{date.today()}"
# ###
# save processed data to IF and native format
# ###
@@ -1080,13 +908,13 @@ terminology_proc = coords_terminologies["category"]
data_proc_if = data_proc_pm2.pr.to_interchange_format()
-if not output_folder.exists():
+if not output_folder.exists() :
output_folder / (output_filename + terminology_proc), data_proc_if
-encoding = {var: compression for var in data_proc_pm2.data_vars}
+encoding = {var : compression for var in data_proc_pm2.data_vars}
output_folder / (output_filename + terminology_proc + ".nc"), encoding=encoding