dodo.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. # define tasks for UNFCCC data repository
  2. from doit import get_var
  3. import os
  4. # TODO: task for folder mapping
  5. # create virtual environment
  6. def task_setup_venv():
  7. """Create virtual environment"""
  8. return {
  9. 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  10. 'actions': ['python3 -m venv venv',
  11. './venv/bin/pip install --upgrade pip wheel',
  12. #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
  13. './venv/bin/pip install --upgrade --upgrade-strategy '
  14. 'eager -e .[dev]',
  15. 'touch venv',],
  16. 'targets': ['venv'],
  17. 'verbosity': 2,
  18. }
  19. # set UNFCCC_GHG_ROOT_PATH environment variable
  20. def task_set_env():
  21. """
  22. Set the environment variable for the module so data is stored in the correct folders
  23. """
  24. def set_root_path():
  25. os.environ["UNFCCC_GHG_ROOT_PATH"] = "."
  26. return {
  27. 'actions': [set_root_path],
  28. }
  29. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  30. read_config_folder = {
  31. "folder": get_var('folder', None),
  32. }
  33. def task_map_folders():
  34. """
  35. Create or update the folder mapping in the given folder
  36. """
  37. return {
  38. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  39. f"--folder={read_config_folder['folder']}"],
  40. 'task_dep': ['set_env'],
  41. 'verbosity': 2,
  42. 'setup': ['setup_venv'],
  43. }
  44. # Tasks for getting submissions and downloading them
  45. def task_update_bur():
  46. """ Update list of BUR submissions """
  47. return {
  48. 'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  49. 'actions': ['datalad run -m "Fetch BUR submissions" '
  50. '-o downloaded_data/UNFCCC/submissions-bur.csv '
  51. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
  52. 'task_dep': ['set_env'],
  53. 'verbosity': 2,
  54. 'setup': ['setup_venv'],
  55. }
  56. def task_download_bur():
  57. """ Download BUR submissions """
  58. return {
  59. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  60. # deactivate file_dep fow now as it will always run fetch submissions
  61. # before download
  62. 'actions': ['datalad run -m "Download BUR submissions" '
  63. '-i downloaded_data/UNFCCC/submissions-bur.csv '
  64. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
  65. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  66. f"--folder=downloaded_data/UNFCCC"
  67. ],
  68. 'task_dep': ['set_env'],
  69. 'verbosity': 2,
  70. 'setup': ['setup_venv'],
  71. }
  72. def task_update_nc():
  73. """ Update list of NC submissions """
  74. return {
  75. 'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  76. 'actions': ['datalad run -m "Fetch NC submissions" '
  77. '-o downloaded_data/UNFCCC/submissions-nc.csv '
  78. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
  79. 'task_dep': ['set_env'],
  80. 'verbosity': 2,
  81. 'setup': ['setup_venv'],
  82. }
  83. def task_download_nc():
  84. """ Download NC submissions """
  85. return {
  86. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  87. # deactivate file_dep fow now as it will always run fetch submissions
  88. # before download
  89. 'actions': ['datalad run -m "Download NC submissions" '
  90. '-i downloaded_data/UNFCCC/submissions-nc.csv '
  91. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
  92. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  93. f"--folder=downloaded_data/UNFCCC"
  94. ],
  95. 'task_dep': ['set_env'],
  96. 'verbosity': 2,
  97. 'setup': ['setup_venv'],
  98. }
  99. # annexI data: one update call for all data types (as they are on one page)
  100. # but for each year separately.
  101. # downloading is per year and
  102. update_aI_config = {
  103. "year": get_var('year', None),
  104. "category": get_var('category', None),
  105. }
  106. def task_update_annexi():
  107. """ Update list of AnnexI submissions """
  108. return {
  109. 'targets': [f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"],
  110. 'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
  111. "--explicit "
  112. f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  113. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
  114. f"--year={update_aI_config['year']}"],
  115. 'task_dep': ['set_env'],
  116. 'verbosity': 2,
  117. 'setup': ['setup_venv'],
  118. }
  119. def task_download_annexi():
  120. """ Download AnnexI submissions """
  121. return {
  122. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  123. # deactivate file_dep fow now as it will always run fetch submissions
  124. # before download
  125. 'actions': [f"datalad run -m 'Download AnnexI submissions for "
  126. f"{update_aI_config['category']}{update_aI_config['year']}' "
  127. f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  128. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
  129. f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
  130. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  131. f"--folder=downloaded_data/UNFCCC"
  132. ],
  133. 'task_dep': ['set_env'],
  134. 'verbosity': 2,
  135. 'setup': ['setup_venv'],
  136. }
  137. def task_download_ndc():
  138. """ Download NDC submissions """
  139. return {
  140. 'actions': ['datalad run -m "Download NDC submissions" '
  141. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
  142. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  143. f"--folder=downloaded_data/UNFCCC"
  144. ],
  145. 'task_dep': ['set_env'],
  146. 'verbosity': 2,
  147. 'setup': ['setup_venv'],
  148. }
  149. # read UNFCCC submissions.
  150. # datalad run is called from within the read_UNFCCC_submission.py script
  151. read_config = {
  152. "country": get_var('country', None),
  153. "submission": get_var('submission', None),
  154. }
  155. # TODO: make individual task for non-UNFCCC submissions
  156. def task_read_unfccc_submission():
  157. """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
  158. return {
  159. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
  160. f"--country={read_config['country']} --submission={read_config['submission']}",
  161. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  162. f"--folder=extracted_data/UNFCCC"
  163. ],
  164. 'task_dep': ['set_env'],
  165. 'verbosity': 2,
  166. 'setup': ['setup_venv'],
  167. }
  168. # read UNFCCC submissions.
  169. # datalad run is called from within the read_UNFCCC_submission.py script
  170. read_config_crf = {
  171. "country": get_var('country', None),
  172. "submission_year": get_var('submission_year', None),
  173. "submission_date": get_var('submission_date', None),
  174. "re_read": get_var('re_read', False),
  175. "countries": get_var('countries', None),
  176. "data_year": get_var('data_year', None),
  177. "totest": get_var('data_year', None),
  178. }
  179. def task_read_unfccc_crf_submission():
  180. """ Read CRF submission for a country """
  181. actions = [
  182. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
  183. f"--country={read_config_crf['country']} "
  184. f"--submission_year={read_config_crf['submission_year']} "
  185. f"--submission_date={read_config_crf['submission_date']} ",
  186. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  187. f"--folder=extracted_data/UNFCCC"
  188. ]
  189. if read_config_crf["re_read"] == "True":
  190. actions[0] = actions[0] + " --re_read"
  191. return {
  192. 'actions': actions,
  193. 'task_dep': ['set_env'],
  194. 'verbosity': 2,
  195. 'setup': ['setup_venv'],
  196. }
  197. def task_read_new_unfccc_crf_for_year():
  198. """ Read CRF submission for all countries for given submission year. by default only reads
  199. data not present yet. Only reads the latest updated submission for each country."""
  200. actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
  201. f"--submission_year={read_config_crf['submission_year']} ",
  202. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  203. f"--folder=extracted_data/UNFCCC"
  204. ]
  205. # specifying countries is currently disabled duo to problems with command line
  206. # list arguments
  207. #if read_config_crf["countries"] is not None:
  208. # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  209. if read_config_crf["re_read"] == "True":
  210. actions[0] = actions[0] + " --re_read"
  211. return {
  212. #'basename': "Read_CRF_year",
  213. 'actions': actions,
  214. 'task_dep': ['set_env'],
  215. 'verbosity': 2,
  216. 'setup': ['setup_venv'],
  217. }
  218. def task_test_read_unfccc_crf_for_year():
  219. """ Read CRF submission for all countries for given submission year. by default only reads
  220. data not present yet. Only reads the latest updated submission for each country."""
  221. actions = [f"./venv/bin/python "
  222. f"UNFCCC_GHG_data/UNFCCC_CRF_reader"
  223. f"/test_read_UNFCCC_CRF_for_year.py "
  224. f"--submission_year={read_config_crf['submission_year']} "
  225. ]
  226. if read_config_crf["totest"] == "True":
  227. actions[0] = actions[0] + " --totest"
  228. if read_config_crf["data_year"] is not None:
  229. actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  230. return {
  231. #'basename': "Read_CRF_year",
  232. 'actions': actions,
  233. 'task_dep': ['set_env'],
  234. 'verbosity': 2,
  235. 'setup': ['setup_venv'],
  236. }
  237. def task_compile_raw_unfccc_crf_for_year():
  238. """ Read CRF submission for all countries for given submission year. by default only reads
  239. data not present yet. Only reads the latest updated submission for each country."""
  240. actions = [f"./venv/bin/python "
  241. f"UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py "
  242. f"--submission_year={read_config_crf['submission_year']} "
  243. ]
  244. return {
  245. 'actions': actions,
  246. 'task_dep': ['set_env'],
  247. 'verbosity': 2,
  248. 'setup': ['setup_venv'],
  249. }
  250. # tasks for DI reader
  251. # datalad run is called from within the read_UNFCCC_DI_for_country.py script
  252. read_config_di = {
  253. "country": get_var('country', None),
  254. "date": get_var('date', None),
  255. "annexI": get_var('annexI', False),
  256. #"countries": get_var('countries', None),
  257. }
  258. def task_read_unfccc_di_for_country():
  259. """ Read DI data for a country """
  260. actions = [
  261. f"./venv/bin/python "
  262. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_datalad.py "
  263. f"--country={read_config_di['country']}",
  264. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  265. f"--folder=extracted_data/UNFCCC"
  266. ]
  267. return {
  268. 'actions': actions,
  269. 'task_dep': ['set_env'],
  270. 'verbosity': 2,
  271. 'setup': ['setup_venv'],
  272. }
  273. def task_process_unfccc_di_for_country():
  274. """ Process DI data for a country """
  275. actions = [
  276. f"./venv/bin/python "
  277. f"UNFCCC_GHG_data/UNFCCC_DI_reader/process_UNFCCC_DI_for_country_datalad.py "
  278. f"--country={read_config_di['country']} --date={read_config_di['date']}",
  279. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  280. f"--folder=extracted_data/UNFCCC"
  281. ]
  282. return {
  283. 'actions': actions,
  284. 'task_dep': ['set_env'],
  285. 'verbosity': 2,
  286. 'setup': ['setup_venv'],
  287. }
  288. def task_read_unfccc_di_for_country_group():
  289. """ Read DI data for a country """
  290. actions = [
  291. f"./venv/bin/python "
  292. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_group_datalad.py",
  293. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  294. f"--folder=extracted_data/UNFCCC"
  295. ]
  296. if read_config_di["annexI"] == "True":
  297. actions[0] = actions[0] + " --annexI"
  298. return {
  299. 'actions': actions,
  300. 'task_dep': ['set_env'],
  301. 'verbosity': 2,
  302. 'setup': ['setup_venv'],
  303. }
  304. # general tasks
  305. def task_country_info():
  306. """ Print information on submissions and datasets
  307. available for given country"""
  308. return {
  309. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/country_info.py "
  310. f"--country={read_config['country']}"],
  311. 'task_dep': ['set_env'],
  312. 'verbosity': 2,
  313. 'setup': ['setup_venv'],
  314. }