test_conversion.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. """Note that these tests only run locally, because they require the downloaded data"""
  2. import primap2 as pm2
  3. import pytest
  4. import xarray as xr
  5. from faostat_data_primap.helper.paths import (
  6. downloaded_data_path,
  7. extracted_data_path,
  8. )
  9. from faostat_data_primap.read import process, read_data
  10. # For development work on process function only
  11. def test_processed_output_remains_the_same():
  12. # get processed data
  13. release_name = "v2024-11-14"
  14. # release_name = "v2023-12-13"
  15. filename_processed_ds = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  16. filepath = extracted_data_path / release_name / (filename_processed_ds + ".nc")
  17. ds_processed = pm2.open_dataset(filepath)
  18. # get raw data
  19. filename_raw_ds = (
  20. extracted_data_path
  21. / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  22. )
  23. ds_raw = pm2.open_dataset(filename_raw_ds)
  24. # process raw data
  25. ds_processed_new = process(ds=ds_raw)
  26. # filter by primap categories (sub-categories can change)
  27. primap_sectors = ["3", "3.A", "M.AG", "M.AG.ELV", "M.LULUCF"]
  28. ds_processed = ds_processed.loc[{"category (IPCC2006_PRIMAP)": primap_sectors}]
  29. ds_processed_new = ds_processed_new.loc[
  30. {"category (IPCC2006_PRIMAP)": primap_sectors}
  31. ]
  32. # compare
  33. xr.testing.assert_allclose(
  34. ds_processed, ds_processed_new, rtol=1e-10, check_dim_order=False
  35. )
  36. # assert ds_processed.broadcast_equals(ds_processed_new)
  37. @pytest.mark.parametrize(
  38. "domains_and_releases_to_read",
  39. [
  40. pytest.param(
  41. [
  42. ("farm_gate_agriculture_energy", "2023-12-13"),
  43. ("farm_gate_emissions_crops", "2023-11-09"),
  44. ("farm_gate_livestock", "2023-11-09"),
  45. ("land_use_drained_organic_soils", "2023-11-09"),
  46. ("land_use_fires", "2023-11-09"),
  47. ("land_use_forests", "2023-11-09"),
  48. ("pre_post_agricultural_production", "2023-11-09"),
  49. ],
  50. id="2023 release",
  51. ),
  52. pytest.param(
  53. [
  54. ("farm_gate_agriculture_energy", "2024-11-14"),
  55. ("farm_gate_emissions_crops", "2024-11-14"),
  56. ("farm_gate_livestock", "2024-11-14"),
  57. ("land_use_drained_organic_soils", "2024-11-14"),
  58. ("land_use_fires", "2024-11-14"),
  59. ("land_use_forests", "2024-11-14"),
  60. ("pre_post_agricultural_production", "2024-11-14"),
  61. ],
  62. id="2024 release",
  63. ),
  64. ],
  65. )
  66. def test_read(tmp_path, domains_and_releases_to_read):
  67. read_data(
  68. domains_and_releases_to_read=domains_and_releases_to_read,
  69. read_path=downloaded_data_path,
  70. # save_path=tmp_path,
  71. save_path=extracted_data_path,
  72. )
  73. # TODO delete everything below here when data set is final
  74. # def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
  75. # release_name = "v2024-11-14"
  76. # # release_name = "v2023-12-13"
  77. #
  78. # # get raw data
  79. # filename_raw_ds = (
  80. # extracted_data_path
  81. # / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  82. # )
  83. # ds_raw = pm2.open_dataset(filename_raw_ds)
  84. #
  85. # # process raw data
  86. # result_proc = process(ds=ds_raw)
  87. #
  88. # result_proc_if = result_proc.pr.to_interchange_format()
  89. #
  90. # # save processed data
  91. # output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  92. # output_folder = extracted_data_path / release_name
  93. #
  94. # if not output_folder.exists():
  95. # output_folder.mkdir()
  96. #
  97. # filepath = output_folder / (output_filename + ".csv")
  98. # print(f"Writing processed primap2 file to {filepath}")
  99. # pm2.pm2io.write_interchange_format(
  100. # filepath,
  101. # result_proc_if,
  102. # )
  103. #
  104. # compression = dict(zlib=True, complevel=9)
  105. # encoding = {var: compression for var in result_proc.data_vars}
  106. # filepath = output_folder / (output_filename + ".nc")
  107. # print(f"Writing netcdf file to {filepath}")
  108. # result_proc.pr.to_netcdf(filepath, encoding=encoding)
  109. #
  110. # def test_read_2023():
  111. # domains_and_releases_to_read = [
  112. # # ("farm_gate_agriculture_energy", "2023-12-13"),
  113. # # ("farm_gate_emissions_crops", "2023-11-09"),
  114. # # ("farm_gate_livestock", "2023-11-09"),
  115. # # ("land_use_drained_organic_soils", "2023-11-09"),
  116. # # ("land_use_fires", "2023-11-09"),
  117. # # ("land_use_forests", "2023-11-09"),
  118. # # ("pre_post_agricultural_production", "2023-11-09"),
  119. # ("farm_gate_agriculture_energy", "2024-11-14"),
  120. # ("farm_gate_emissions_crops", "2024-11-14"),
  121. # ("farm_gate_livestock", "2024-11-14"),
  122. # ("land_use_drained_organic_soils", "2024-11-14"),
  123. # ("land_use_fires", "2024-11-14"),
  124. # ("land_use_forests", "2024-11-14"),
  125. # ("pre_post_agricultural_production", "2024-11-14"),
  126. # ]
  127. #
  128. # read_data(
  129. # domains_and_releases_to_read=domains_and_releases_to_read,
  130. # read_path=downloaded_data_path,
  131. # save_path=extracted_data_path,
  132. # )