test_datatree_for_crt_crf.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import pytest
  2. from src.unfccc_ghg_data.unfccc_crf_reader.crf_raw_for_year_sparse_arrays import (
  3. crf_raw_for_year_datatree
  4. )
  5. import primap2 as pm
  6. import xarray as xr
  7. import numpy as np
  8. @pytest.fixture
  9. def crt_dt(tmp_path):
  10. """Return a data tree object with CRT data for 25 countries"""
  11. n_countries = 100 # 25 of which are filled
  12. crt_dt = crf_raw_for_year_datatree(
  13. submission_year=1,
  14. submission_type="CRT",
  15. n_countries=n_countries,
  16. output_folder=tmp_path,
  17. )
  18. return crt_dt
  19. def test_country_aggregation(crt_dt):
  20. # Nigeria (NGA)
  21. expected_NGA = 123840.3389332373
  22. assert np.squeeze(crt_dt["/NGA"]["CO2"].sel(
  23. {"category (CRT1)" : "1", "class" : "Total", "time" : "2022-01-01"}).to_numpy()).item() == expected_NGA
  24. # NGA has data from 2000 to 2022
  25. assert len(crt_dt["/NGA"].time) == 23
  26. # Georgia (GEO)
  27. expected_GEO = 10954.899111969342
  28. assert np.squeeze(crt_dt["/GEO"]["CO2"].sel(
  29. {"category (CRT1)" : "1", "class" : "Total", "time" : "2022-01-01"}).to_numpy()).item() == expected_GEO
  30. # GEO has data from 1990 to 2022
  31. assert len(crt_dt["/GEO"].time) == 33
  32. # we have a value for 1990
  33. assert np.squeeze(crt_dt["/GEO"]["CO2"].sel(
  34. {"category (CRT1)" : "1", "class" : "Total", "time" : "1990-01-01"}).to_numpy()).item()
  35. # Remove country dimension and add
  36. crt_dt["/NGA_GEO"] = crt_dt["/NGA"].to_dataset().pr.loc[{"area (ISO3)" : "NGA"}] + crt_dt["/GEO"].to_dataset().pr.loc[{"area (ISO3)" : "GEO"}]
  37. # Check if the sum of NGA and GEO is equal to NGA_GEO
  38. # This will perform an inner join, NGA has 23 time steps, GEO has 33 time steps,
  39. # and the result will have 23 time steps
  40. assert np.squeeze(crt_dt["/NGA_GEO"]["CO2"].sel(
  41. {"category (CRT1)" : "1", "class" : "Total", "time" : "2022-01-01"}).to_numpy()).item() == expected_NGA + expected_GEO
  42. assert len(crt_dt["/NGA_GEO"].time) == 23
  43. # The default option for arithmetic operations is to perform an inner join
  44. # set options to perform outer join
  45. with xr.set_options(arithmetic_join="outer"):
  46. crt_dt["/NGA_GEO_outer"] = crt_dt["/NGA"].to_dataset().pr.loc[{"area (ISO3)" : "NGA"}] + crt_dt["/GEO"].to_dataset().pr.loc[{"area (ISO3)" : "GEO"}]
  47. # Where there is overlap between NGA and GEO, the values will be added
  48. assert np.squeeze(crt_dt["/NGA_GEO_outer"]["CO2"].sel(
  49. {"category (CRT1)" : "1", "class" : "Total", "time" : "2022-01-01"}).to_numpy()).item() == expected_NGA + expected_GEO
  50. # When only one time series has a value, the value will be set to nan
  51. # TODO Is that the behaviour we need?
  52. assert np.isnan(np.squeeze(crt_dt["/NGA_GEO_outer"]["CO2"].sel(
  53. {"category (CRT1)" : "1", "class" : "Total", "time" : "1990-01-01"}).to_numpy()).item())
  54. # Extra time steps will be added
  55. assert len(crt_dt["/NGA_GEO_outer"].time) == 33
  56. # How can we use data tree methods for this?
  57. # filter countries we need
  58. dt_geo_nga = crt_dt.filter(lambda x: x.name in ["NGA", "GEO"])
  59. # fails because, x will be a data set view object that does not have the attribute to_dataset
  60. # dt_geo_nga = dt_geo_nga.map_over_datasets(lambda x: x.to_dataset().pr.loc[{"area (ISO3)" : x.name}])
  61. def test_crf_for_year_original_version(crt_dt):
  62. print(crt_dt.groups)
  63. # output:
  64. # ('/', '/DZA', '/ARG', '/AZE', '/BTN', '/BRA', '/BRN', '/CHL', '/CHN', '/COL', '/CIV', '/ECU', '/EGY', '/GEO',
  65. # '/GHA', '/GNB', '/GUY', '/IDN', '/KEN', '/LBN', '/MYS', '/MDV', '/MUS', '/MAR', '/NAM', '/NGA')
  66. # out of the 100 countries, 26 have CRT data
  67. # to make it more interesting, we also add primap-hist to the data tree
  68. primap_hist = pm.open_dataset("../../data/Guetschow_et_al_2025-PRIMAP-hist_v2.6.1_final_13-Mar-2025.nc")
  69. # Assign primap hist to the data tree
  70. crt_dt["primap_hist"] = primap_hist
  71. print(crt_dt.groups)
  72. # output:
  73. # ('/', '/DZA', '/ARG', '/AZE', '/BTN', '/BRA', '/BRN', '/CHL', '/CHN', '/COL', '/CIV', '/ECU', '/EGY', '/GEO',
  74. # '/GHA', '/GNB', '/GUY', '/IDN', '/KEN', '/LBN', '/MYS', '/MDV', '/MUS', '/MAR', '/NAM', '/NGA', '/primap_hist')
  75. # try some queries
  76. # get the data for category 1
  77. # this will filter the primap-hist data set, because the CRT comes with
  78. # the coordinate "category (CRT1)"
  79. # Note that the data tree will have the same structure, only the individual
  80. # data sets will be filtered
  81. # print(dt_crt.sel({"category (IPCC2006_PRIMAP)" : "1"}))
  82. # we can re-assign the primap hist data set with a renamed category dimension
  83. crt_dt["primap_hist"] = primap_hist.rename({"category (IPCC2006_PRIMAP)" : "category (CRT1)"})
  84. # Now we should be able to filter for categories over all nodes
  85. dt_cat1 = crt_dt.sel({"category (CRT1)" : "1"})