test_conversion.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. """Note that these tests only run locally, because they require the downloaded data"""
  2. import primap2 as pm2
  3. import pytest
  4. from faostat_data_primap.helper.paths import (
  5. downloaded_data_path,
  6. extracted_data_path,
  7. )
  8. from faostat_data_primap.read import process, read_data
  9. # For development work on process function only
  10. def test_processed_output_remains_the_same():
  11. # get processed data
  12. release_name = "v2024-11-14"
  13. # release_name = "v2023-12-13"
  14. filename_processed_ds = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  15. filepath = extracted_data_path / release_name / (filename_processed_ds + ".nc")
  16. ds_processed = pm2.open_dataset(filepath)
  17. # get raw data
  18. filename_raw_ds = (
  19. extracted_data_path
  20. / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  21. )
  22. ds_raw = pm2.open_dataset(filename_raw_ds)
  23. # process raw data
  24. ds_processed_new = process(ds=ds_raw)
  25. # filter by primap categories (sub-categories can change)
  26. primap_sectors = ["3", "3.A", "M.AG", "M.AG.ELV", "M.LULUCF"]
  27. ds_processed = ds_processed.loc[{"category (IPCC2006_PRIMAP)": primap_sectors}]
  28. ds_processed_new = ds_processed_new.loc[
  29. {"category (IPCC2006_PRIMAP)": primap_sectors}
  30. ]
  31. # compare
  32. assert ds_processed.broadcast_equals(ds_processed_new)
  33. @pytest.mark.parametrize(
  34. "domains_and_releases_to_read",
  35. [
  36. pytest.param(
  37. [
  38. ("farm_gate_agriculture_energy", "2023-12-13"),
  39. ("farm_gate_emissions_crops", "2023-11-09"),
  40. ("farm_gate_livestock", "2023-11-09"),
  41. ("land_use_drained_organic_soils", "2023-11-09"),
  42. ("land_use_fires", "2023-11-09"),
  43. ("land_use_forests", "2023-11-09"),
  44. ("pre_post_agricultural_production", "2023-11-09"),
  45. ],
  46. id="2023 release",
  47. ),
  48. pytest.param(
  49. [
  50. ("farm_gate_agriculture_energy", "2024-11-14"),
  51. ("farm_gate_emissions_crops", "2024-11-14"),
  52. ("farm_gate_livestock", "2024-11-14"),
  53. ("land_use_drained_organic_soils", "2024-11-14"),
  54. ("land_use_fires", "2024-11-14"),
  55. ("land_use_forests", "2024-11-14"),
  56. ("pre_post_agricultural_production", "2024-11-14"),
  57. ],
  58. id="2024 release",
  59. ),
  60. ],
  61. )
  62. def test_read(tmp_path, domains_and_releases_to_read):
  63. read_data(
  64. domains_and_releases_to_read=domains_and_releases_to_read,
  65. read_path=downloaded_data_path,
  66. # save_path=tmp_path,
  67. save_path=extracted_data_path,
  68. )
  69. # TODO delete everything below here when data set is final
  70. # def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
  71. # release_name = "v2024-11-14"
  72. # # release_name = "v2023-12-13"
  73. #
  74. # # get raw data
  75. # filename_raw_ds = (
  76. # extracted_data_path
  77. # / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  78. # )
  79. # ds_raw = pm2.open_dataset(filename_raw_ds)
  80. #
  81. # # process raw data
  82. # result_proc = process(ds=ds_raw)
  83. #
  84. # result_proc_if = result_proc.pr.to_interchange_format()
  85. #
  86. # # save processed data
  87. # output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  88. # output_folder = extracted_data_path / release_name
  89. #
  90. # if not output_folder.exists():
  91. # output_folder.mkdir()
  92. #
  93. # filepath = output_folder / (output_filename + ".csv")
  94. # print(f"Writing processed primap2 file to {filepath}")
  95. # pm2.pm2io.write_interchange_format(
  96. # filepath,
  97. # result_proc_if,
  98. # )
  99. #
  100. # compression = dict(zlib=True, complevel=9)
  101. # encoding = {var: compression for var in result_proc.data_vars}
  102. # filepath = output_folder / (output_filename + ".nc")
  103. # print(f"Writing netcdf file to {filepath}")
  104. # result_proc.pr.to_netcdf(filepath, encoding=encoding)
  105. #
  106. # def test_read_2023():
  107. # domains_and_releases_to_read = [
  108. # # ("farm_gate_agriculture_energy", "2023-12-13"),
  109. # # ("farm_gate_emissions_crops", "2023-11-09"),
  110. # # ("farm_gate_livestock", "2023-11-09"),
  111. # # ("land_use_drained_organic_soils", "2023-11-09"),
  112. # # ("land_use_fires", "2023-11-09"),
  113. # # ("land_use_forests", "2023-11-09"),
  114. # # ("pre_post_agricultural_production", "2023-11-09"),
  115. # ("farm_gate_agriculture_energy", "2024-11-14"),
  116. # ("farm_gate_emissions_crops", "2024-11-14"),
  117. # ("farm_gate_livestock", "2024-11-14"),
  118. # ("land_use_drained_organic_soils", "2024-11-14"),
  119. # ("land_use_fires", "2024-11-14"),
  120. # ("land_use_forests", "2024-11-14"),
  121. # ("pre_post_agricultural_production", "2024-11-14"),
  122. # ]
  123. #
  124. # read_data(
  125. # domains_and_releases_to_read=domains_and_releases_to_read,
  126. # read_path=downloaded_data_path,
  127. # save_path=extracted_data_path,
  128. # )