|
@@ -13,6 +13,7 @@ from unfccc_ghg_data.unfccc_reader.Cabo_Verde.config_cpv_bur1 import (
|
|
|
coords_defaults,
|
|
|
coords_terminologies,
|
|
|
coords_value_mapping,
|
|
|
+ inv_conf,
|
|
|
inv_conf_main,
|
|
|
inv_conf_per_sector,
|
|
|
meta_data,
|
|
@@ -24,6 +25,10 @@ if __name__ == "__main__":
|
|
|
# configuration
|
|
|
# ###
|
|
|
|
|
|
+ # for regex later
|
|
|
+ def repl(m): # noqa: D103
|
|
|
+ return m.group("code")
|
|
|
+
|
|
|
input_folder = downloaded_data_path / "UNFCCC" / "Cabo_Verde" / "BUR1"
|
|
|
output_folder = extracted_data_path / "UNFCCC" / "Cabo_Verde"
|
|
|
|
|
@@ -54,6 +59,40 @@ if __name__ == "__main__":
|
|
|
if not skip_rows_start == 0:
|
|
|
df_page = df_page[skip_rows_start:]
|
|
|
|
|
|
+ df_page.columns = inv_conf_main["pages"][page]["column_names"]
|
|
|
+
|
|
|
+ # first the manual replacements
|
|
|
+ df_page["category"] = df_page["category"].replace(
|
|
|
+ inv_conf_main["pages"][page]["cat_codes_manual"]
|
|
|
+ )
|
|
|
+
|
|
|
+ # Remove dots between letters in category codes
|
|
|
+ df_page["category"] = df_page["category"].str.replace(".", "")
|
|
|
+ # Some categories have a dash between the letters
|
|
|
+ df_page["category"] = df_page["category"].str.replace("-", " ")
|
|
|
+
|
|
|
+ # then the regex replacements
|
|
|
+ df_page["category"] = df_page["category"].str.replace(
|
|
|
+ inv_conf["cat_code_regexp"], repl, regex=True
|
|
|
+ )
|
|
|
+
|
|
|
+ df_page = pd.melt(
|
|
|
+ df_page,
|
|
|
+ id_vars="category",
|
|
|
+ value_vars=inv_conf_main["pages"][page]["entities"],
|
|
|
+ )
|
|
|
+
|
|
|
+ df_page = df_page.rename({"value": "data", "variable": "entity"}, axis=1)
|
|
|
+
|
|
|
+ df_page["data"] = df_page["data"].str.replace(",", ".")
|
|
|
+
|
|
|
+ # df_page["unit"] = df_page["entity"]
|
|
|
+
|
|
|
+ # set unit based on entity
|
|
|
+ df_page["unit"] = df_page["entity"].replace(
|
|
|
+ inv_conf_main["pages"][page]["unit_for_entity"]
|
|
|
+ )
|
|
|
+
|
|
|
# stack the tables vertically
|
|
|
if df_main is None:
|
|
|
df_main = df_page
|
|
@@ -66,6 +105,20 @@ if __name__ == "__main__":
|
|
|
axis=0,
|
|
|
join="outer",
|
|
|
).reset_index(drop=True)
|
|
|
+ break
|
|
|
+
|
|
|
+ df_main_if = pm2.pm2io.convert_wide_dataframe_if(
|
|
|
+ df_main,
|
|
|
+ coords_cols=coords_cols,
|
|
|
+ # add_coords_cols=add_coords_cols,
|
|
|
+ coords_defaults=coords_defaults,
|
|
|
+ coords_terminologies=coords_terminologies,
|
|
|
+ coords_value_mapping=coords_value_mapping,
|
|
|
+ # coords_value_filling=coords_value_filling,
|
|
|
+ # filter_remove=filter_remove,
|
|
|
+ # filter_keep=filter_keep,
|
|
|
+ meta_data=meta_data,
|
|
|
+ )
|
|
|
|
|
|
# ###
|
|
|
# 1. Read trend tables 1995, 2000, 2005, 2010, 2015 and 2019
|
|
@@ -105,7 +158,7 @@ if __name__ == "__main__":
|
|
|
|
|
|
# remove all thousand separator commas
|
|
|
for year in trend_years:
|
|
|
- df_page[year] = df_page[year].str.replace(",", "")
|
|
|
+ df_page[year] = df_page[year].str.replace(",", ".")
|
|
|
|
|
|
# add unit
|
|
|
df_page["unit"] = inv_conf_per_sector[sector]["unit"]
|