convert_CT_data_2021.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # convert 2021 inventory to PRIMAP2 format
  2. #imports
  3. import primap2 as pm2
  4. import pandas as pd
  5. import os
  6. import sys
  7. from pathlib import Path
  8. # configuration
  9. ### folders and filenames
  10. input_folder = Path("../downloaded_data/2021")
  11. input_file = "climatetrace_emissions_by_subsector_timeseries_interval_year_since_2015_to_2020.csv"
  12. output_folder = Path("../data_primap2/2021")
  13. output_filename = "ClimateTRACE_Inventory_2021"
  14. compression = dict(zlib=True, complevel=9)
  15. ### primap2 format definitions
  16. coords_terminologies = {
  17. "area": "ISO3",
  18. "category": "ClimateTrace2021",
  19. "scenario": "ReleaseDate",
  20. }
  21. coords_defaults = {
  22. "source": "Climate Trace",
  23. "provenance": "measured",
  24. "scenario": "2021",
  25. "entity": "KYOTOGHG (AR4GWP100)",
  26. # GWPs are unclear as they only state that it's 100 year potentials but not from which AR
  27. "unit": "t CO2 / yr"
  28. }
  29. coords_cols = {
  30. "category": "sector",
  31. "area": "country",
  32. }
  33. meta_data = {
  34. "references": "https://www.climatetrace.org/inventory",
  35. "rights": "",
  36. "contact": "johannes.guetschow@pik-potsdam.de",
  37. "title": "Climate Trace Emissions Inventory 2021",
  38. "comment": "Read fom csv file by Johannes Gütschow",
  39. "institution": "Climate Trace - www.climatetrace.org",
  40. }
  41. # change working directory to script directory for proper folder names
  42. script_path = os.path.abspath(sys.argv[0])
  43. script_dir_name = os.path.dirname(script_path)
  44. os.chdir(script_dir_name)
  45. # read the data
  46. ct_data = pd.read_csv(input_folder / input_file)
  47. # convert dates to just years
  48. ct_data["start"] = ct_data["start"].replace(r"([0-9]{4})\-[0-9]{2}\-[0-9]{2}", r"\1", regex=True)
  49. # combine sector information in one column
  50. ct_data["sector"] = ct_data["sector"] + " - " + ct_data["subsector"]
  51. # drop old columns and rename columns
  52. ct_data = ct_data.drop(columns = ["end", "subsector", "country_full"])
  53. ct_data = ct_data.rename(columns={"Tonnes Co2e": "data", "start": "time"})
  54. # convert to primap2 format
  55. ct_data_if = pm2.pm2io.convert_long_dataframe_if(
  56. ct_data,
  57. coords_cols=coords_cols,
  58. coords_defaults=coords_defaults,
  59. coords_terminologies=coords_terminologies,
  60. meta_data=meta_data,
  61. convert_str=True
  62. )
  63. # write the result in IF
  64. if not output_folder.exists():
  65. output_folder.mkdir()
  66. pm2.pm2io.write_interchange_format(output_folder / output_filename, ct_data_if)
  67. # Convert to native pm2 format and write result
  68. ct_data_pm2 = pm2.pm2io.from_interchange_format(ct_data_if)
  69. encoding = {var: compression for var in ct_data_pm2.data_vars}
  70. ct_data_pm2.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)