test_conversion.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """Note that these tests only run locally, because they require the downloaded data"""
  2. import primap2 as pm2
  3. import pytest
  4. from faostat_data_primap.helper.paths import (
  5. downloaded_data_path,
  6. extracted_data_path,
  7. )
  8. from faostat_data_primap.read import process, read_data
  9. # For development work on process function only
  10. def test_processed_output_remains_the_same():
  11. # get processed data
  12. release_name = "v2024-11-14"
  13. # release_name = "v2023-12-13"
  14. filename_processed_ds = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  15. filepath = extracted_data_path / release_name / (filename_processed_ds + ".nc")
  16. ds_processed = pm2.open_dataset(filepath)
  17. # get raw data
  18. filename_raw_ds = (
  19. extracted_data_path
  20. / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  21. )
  22. ds_raw = pm2.open_dataset(filename_raw_ds)
  23. # process raw data
  24. ds_processed_new = process(ds=ds_raw)
  25. # filter by primap categories (sub-categories can change)
  26. primap_sectors = ["3", "3.A", "M.AG", "M.AG.ELV", "M.LULUCF"]
  27. ds_processed = ds_processed.loc[{"category (IPCC2006_PRIMAP)": primap_sectors}]
  28. ds_processed_new = ds_processed_new.loc[
  29. {"category (IPCC2006_PRIMAP)": primap_sectors}
  30. ]
  31. # compare
  32. # xr.testing.assert_allclose(ds_processed, ds_processed_new, rtol=1e-5, atol=1e-8)
  33. assert ds_processed.broadcast_equals(ds_processed_new)
  34. @pytest.mark.parametrize(
  35. "domains_and_releases_to_read",
  36. [
  37. pytest.param(
  38. [
  39. ("farm_gate_agriculture_energy", "2023-12-13"),
  40. ("farm_gate_emissions_crops", "2023-11-09"),
  41. ("farm_gate_livestock", "2023-11-09"),
  42. ("land_use_drained_organic_soils", "2023-11-09"),
  43. ("land_use_fires", "2023-11-09"),
  44. ("land_use_forests", "2023-11-09"),
  45. ("pre_post_agricultural_production", "2023-11-09"),
  46. ],
  47. id="2023 release",
  48. ),
  49. pytest.param(
  50. [
  51. ("farm_gate_agriculture_energy", "2024-11-14"),
  52. ("farm_gate_emissions_crops", "2024-11-14"),
  53. ("farm_gate_livestock", "2024-11-14"),
  54. ("land_use_drained_organic_soils", "2024-11-14"),
  55. ("land_use_fires", "2024-11-14"),
  56. ("land_use_forests", "2024-11-14"),
  57. ("pre_post_agricultural_production", "2024-11-14"),
  58. ],
  59. id="2024 release",
  60. ),
  61. ],
  62. )
  63. def test_read(tmp_path, domains_and_releases_to_read):
  64. read_data(
  65. domains_and_releases_to_read=domains_and_releases_to_read,
  66. read_path=downloaded_data_path,
  67. # save_path=tmp_path,
  68. save_path=extracted_data_path,
  69. )
  70. # TODO delete everything below here when data set is final
  71. # def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
  72. # release_name = "v2024-11-14"
  73. # # release_name = "v2023-12-13"
  74. #
  75. # # get raw data
  76. # filename_raw_ds = (
  77. # extracted_data_path
  78. # / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
  79. # )
  80. # ds_raw = pm2.open_dataset(filename_raw_ds)
  81. #
  82. # # process raw data
  83. # result_proc = process(ds=ds_raw)
  84. #
  85. # result_proc_if = result_proc.pr.to_interchange_format()
  86. #
  87. # # save processed data
  88. # output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
  89. # output_folder = extracted_data_path / release_name
  90. #
  91. # if not output_folder.exists():
  92. # output_folder.mkdir()
  93. #
  94. # filepath = output_folder / (output_filename + ".csv")
  95. # print(f"Writing processed primap2 file to {filepath}")
  96. # pm2.pm2io.write_interchange_format(
  97. # filepath,
  98. # result_proc_if,
  99. # )
  100. #
  101. # compression = dict(zlib=True, complevel=9)
  102. # encoding = {var: compression for var in result_proc.data_vars}
  103. # filepath = output_folder / (output_filename + ".nc")
  104. # print(f"Writing netcdf file to {filepath}")
  105. # result_proc.pr.to_netcdf(filepath, encoding=encoding)
  106. #
  107. # def test_read_2023():
  108. # domains_and_releases_to_read = [
  109. # # ("farm_gate_agriculture_energy", "2023-12-13"),
  110. # # ("farm_gate_emissions_crops", "2023-11-09"),
  111. # # ("farm_gate_livestock", "2023-11-09"),
  112. # # ("land_use_drained_organic_soils", "2023-11-09"),
  113. # # ("land_use_fires", "2023-11-09"),
  114. # # ("land_use_forests", "2023-11-09"),
  115. # # ("pre_post_agricultural_production", "2023-11-09"),
  116. # ("farm_gate_agriculture_energy", "2024-11-14"),
  117. # ("farm_gate_emissions_crops", "2024-11-14"),
  118. # ("farm_gate_livestock", "2024-11-14"),
  119. # ("land_use_drained_organic_soils", "2024-11-14"),
  120. # ("land_use_fires", "2024-11-14"),
  121. # ("land_use_forests", "2024-11-14"),
  122. # ("pre_post_agricultural_production", "2024-11-14"),
  123. # ]
  124. #
  125. # read_data(
  126. # domains_and_releases_to_read=domains_and_releases_to_read,
  127. # read_path=downloaded_data_path,
  128. # save_path=extracted_data_path,
  129. # )