functions_temp.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import pandas as pd
  2. import warnings
  3. import numpy as np
  4. def assert_values(
  5. df: pd.DataFrame,
  6. test_case: tuple[str | float | int],
  7. category_column: str = "category (IPCC1996_2006_GIN_Inv)",
  8. entity_column: str = "entity",
  9. ) -> None:
  10. """
  11. Check if a value in a dataframe matches the expected value.
  12. Input
  13. -----
  14. df
  15. The data frame to check.
  16. test_case
  17. The combination of parameters and the expected value.
  18. Use the format (<category>, <entity>, <year>, <expected_value>).
  19. category_column
  20. The columns where to look for the category.
  21. entity_column
  22. The column where to look for the entity.
  23. """
  24. category = test_case[0]
  25. entity = test_case[1]
  26. year = test_case[2]
  27. expected_value = test_case[3]
  28. assert isinstance(expected_value, (float, int)), "This function only works for numbers. Use assert_nan_values to check for NaNs and empty values."
  29. arr = df.loc[
  30. (df[category_column] == category) & (df[entity_column] == entity), year
  31. ].values
  32. # Assert the category exists in the data frame
  33. assert (
  34. category in df[category_column].unique()
  35. ), f"{category} is not a valid category. Choose from {df[category_column].unique()}"
  36. # Assert the entity exists in the data frame
  37. assert (
  38. entity in df[entity_column].unique()
  39. ), f"{entity} is not a valid entity. Choose from {df[entity_column].unique()}"
  40. assert (
  41. arr.size > 0
  42. ), f"No value found for category {category}, entity {entity}, year {year}!"
  43. assert (
  44. arr.size <= 1
  45. ), f"More than one value found for category {category}, entity {entity}, year {year}!"
  46. assert (
  47. arr[0] == test_case[3]
  48. ), f"Expected value {expected_value}, actual value is {arr[0]}"
  49. print(
  50. f"Value for category {category}, entity {entity}, year {year} is as expected."
  51. )
  52. def assert_nan_values(
  53. df: pd.DataFrame,
  54. test_case: tuple[str, ...],
  55. category_column: str = "category (IPCC1996_2006_GIN_Inv)",
  56. entity_column: str = "entity",
  57. ) -> None:
  58. """
  59. Check if values that are empty or NE or NE1 in the PDF tables
  60. are not present in the dataset.
  61. Input
  62. -----
  63. df
  64. The data frame to check.
  65. test_case
  66. The combination of input parameters.
  67. Use the format (<category>, <entity>, <year>).
  68. category_column
  69. The columns where to look for the category.
  70. entity_column
  71. The column where to look for the entity.
  72. """
  73. category = test_case[0]
  74. entity = test_case[1]
  75. year = test_case[2]
  76. if category not in df[category_column].unique():
  77. warning_string = f"{category} is not in the data set. Either all values for this category are NaN or the category never existed in the data set."
  78. warnings.warn(warning_string)
  79. return
  80. if entity not in df[entity_column].unique():
  81. warning_string = f"{entity} is not in the data set. Either all values for this entity are NaN or the category never existed in the data set."
  82. warnings.warn(warning_string)
  83. return
  84. arr = df.loc[
  85. (df[category_column] == category) & (df[entity_column] == entity), year
  86. ].values
  87. assert np.isnan(arr[0]), f"Value is {arr[0]} and not NaN."
  88. print(f"Value for category {category}, entity {entity}, year {year} is NaN.")