functions_temp.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. """Temporary file for new functions to avoid merging issues due to different automatic formatting. Delete after merge."""
  2. import numpy as np
  3. import pandas as pd
  4. def find_and_replace_values(
  5. df: pd.DataFrame,
  6. replace_info: list[tuple[str | float]],
  7. category_column: str,
  8. entity_column: str = "entity",
  9. ) -> pd.DataFrame:
  10. """
  11. Find values and replace single values in a dataframe.
  12. Input
  13. -----
  14. df
  15. Input data frame
  16. replace_info
  17. Category, entity, year, and new value. Don't put a new value if you would like to replace with nan.
  18. For example [("3.C", "CO", "2019", 3.423)] or [("3.C", "CO", "2019")]
  19. category_column
  20. The name of the column that contains the categories.
  21. entity_column
  22. The name of the column that contains the categories.
  23. Output
  24. ------
  25. Data frame with updated values.
  26. """
  27. for replace_info_value in replace_info:
  28. category = replace_info_value[0]
  29. entity = replace_info_value[1]
  30. year = replace_info_value[2]
  31. if len(replace_info_value) == 4:
  32. new_value = replace_info_value[3]
  33. elif len(replace_info_value) == 3:
  34. new_value = np.nan
  35. else:
  36. raise AssertionError(
  37. f"Expected tuple of length 3 or 4. Got {replace_info_value}"
  38. )
  39. index = df.loc[
  40. (df[category_column] == category) & (df[entity_column] == entity),
  41. ].index[0]
  42. # pandas recommends using .at[] for changing single values
  43. df.at[index, year] = new_value
  44. print(f"Set value for {category}, {entity}, {year} to {new_value}.")
  45. return df