dodo.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. # define tasks for UNFCCC data repository
  2. from doit import get_var
  3. import os
  4. # TODO: task for folder mapping
  5. # create virtual environment
  6. def task_setup_venv():
  7. """Create virtual environment"""
  8. return {
  9. 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  10. 'actions': ['python3 -m venv venv',
  11. './venv/bin/pip install --upgrade pip wheel',
  12. #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
  13. './venv/bin/pip install --upgrade --upgrade-strategy '
  14. 'eager -e .[dev]',
  15. 'touch venv',],
  16. 'targets': ['venv'],
  17. 'verbosity': 2,
  18. }
  19. # set UNFCCC_GHG_ROOT_PATH environment variable
  20. def task_set_env():
  21. """
  22. Set the environment variable for the module so data is stored in the correct folders
  23. """
  24. def set_root_path():
  25. os.environ["UNFCCC_GHG_ROOT_PATH"] = "."
  26. return {
  27. 'actions': [set_root_path],
  28. }
  29. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  30. read_config_folder = {
  31. "folder": get_var('folder', None),
  32. }
  33. def task_map_folders():
  34. """
  35. Create or update the folder mapping in the given folder
  36. """
  37. return {
  38. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  39. f"--folder={read_config_folder['folder']}"],
  40. 'task_dep': ['set_env'],
  41. 'verbosity': 2,
  42. 'setup': ['setup_venv'],
  43. }
  44. # Tasks for getting submissions and downloading them
  45. def task_update_bur():
  46. """ Update list of BUR submissions """
  47. return {
  48. 'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  49. 'actions': ['datalad run -m "Fetch BUR submissions" '
  50. '-o downloaded_data/UNFCCC/submissions-bur.csv '
  51. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
  52. 'task_dep': ['set_env'],
  53. 'verbosity': 2,
  54. 'setup': ['setup_venv'],
  55. }
  56. def task_download_bur():
  57. """ Download BUR submissions """
  58. return {
  59. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  60. # deactivate file_dep fow now as it will always run fetch submissions
  61. # before download
  62. 'actions': ['datalad run -m "Download BUR submissions" '
  63. '-i downloaded_data/UNFCCC/submissions-bur.csv '
  64. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
  65. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  66. f"--folder=downloaded_data/UNFCCC"
  67. ],
  68. 'task_dep': ['set_env'],
  69. 'verbosity': 2,
  70. 'setup': ['setup_venv'],
  71. }
  72. def task_update_nc():
  73. """ Update list of NC submissions """
  74. return {
  75. 'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  76. 'actions': ['datalad run -m "Fetch NC submissions" '
  77. '-o downloaded_data/UNFCCC/submissions-nc.csv '
  78. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
  79. 'task_dep': ['set_env'],
  80. 'verbosity': 2,
  81. 'setup': ['setup_venv'],
  82. }
  83. def task_download_nc():
  84. """ Download NC submissions """
  85. return {
  86. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  87. # deactivate file_dep fow now as it will always run fetch submissions
  88. # before download
  89. 'actions': ['datalad run -m "Download NC submissions" '
  90. '-i downloaded_data/UNFCCC/submissions-nc.csv '
  91. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
  92. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  93. f"--folder=downloaded_data/UNFCCC"
  94. ],
  95. 'task_dep': ['set_env'],
  96. 'verbosity': 2,
  97. 'setup': ['setup_venv'],
  98. }
  99. # annexI data: one update call for all data types (as they are on one page)
  100. # but for each year separately.
  101. # downloading is per year and
  102. update_aI_config = {
  103. "year": get_var('year', None),
  104. "category": get_var('category', None),
  105. }
  106. def task_update_annexi():
  107. """ Update list of AnnexI submissions """
  108. return {
  109. 'targets': [f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"],
  110. 'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
  111. "--explicit "
  112. f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  113. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
  114. f"--year={update_aI_config['year']}"],
  115. 'task_dep': ['set_env'],
  116. 'verbosity': 2,
  117. 'setup': ['setup_venv'],
  118. }
  119. def task_download_annexi():
  120. """ Download AnnexI submissions """
  121. return {
  122. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  123. # deactivate file_dep fow now as it will always run fetch submissions
  124. # before download
  125. 'actions': [f"datalad run -m 'Download AnnexI submissions for "
  126. f"{update_aI_config['category']}{update_aI_config['year']}' "
  127. f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  128. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
  129. f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
  130. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  131. f"--folder=downloaded_data/UNFCCC"
  132. ],
  133. 'task_dep': ['set_env'],
  134. 'verbosity': 2,
  135. 'setup': ['setup_venv'],
  136. }
  137. def task_download_ndc():
  138. """ Download NDC submissions """
  139. return {
  140. 'actions': ['datalad run -m "Download NDC submissions" '
  141. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
  142. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  143. f"--folder=downloaded_data/UNFCCC"
  144. ],
  145. 'task_dep': ['set_env'],
  146. 'verbosity': 2,
  147. 'setup': ['setup_venv'],
  148. }
  149. # read UNFCCC submissions.
  150. # datalad run is called from within the read_UNFCCC_submission.py script
  151. read_config = {
  152. "country": get_var('country', None),
  153. "submission": get_var('submission', None),
  154. }
  155. # TODO: make individual task for non-UNFCCC submissions
  156. def task_read_unfccc_submission():
  157. """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
  158. return {
  159. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
  160. f"--country={read_config['country']} --submission={read_config['submission']}",
  161. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  162. f"--folder=extracted_data/UNFCCC"
  163. ],
  164. 'task_dep': ['set_env'],
  165. 'verbosity': 2,
  166. 'setup': ['setup_venv'],
  167. }
  168. # read UNFCCC submissions.
  169. # datalad run is called from within the read_UNFCCC_submission.py script
  170. read_config_crf = {
  171. "country": get_var('country', None),
  172. "submission_year": get_var('submission_year', None),
  173. "submission_date": get_var('submission_date', None),
  174. "re_read": get_var('re_read', False),
  175. "countries": get_var('countries', None),
  176. "data_year": get_var('data_year', None),
  177. "totest": get_var('totest', None),
  178. }
  179. def task_read_unfccc_crf_submission():
  180. """ Read CRF submission for a country """
  181. actions = [
  182. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
  183. f"--country={read_config_crf['country']} "
  184. f"--submission_year={read_config_crf['submission_year']} "
  185. f"--submission_date={read_config_crf['submission_date']} ",
  186. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  187. f"--folder=extracted_data/UNFCCC"
  188. ]
  189. if read_config_crf["re_read"] == "True":
  190. actions[0] = actions[0] + " --re_read"
  191. return {
  192. 'actions': actions,
  193. 'task_dep': ['set_env'],
  194. 'verbosity': 2,
  195. 'setup': ['setup_venv'],
  196. }
  197. def task_read_new_unfccc_crf_for_year():
  198. """ Read CRF submission for all countries for given submission year. by default only reads
  199. data not present yet. Only reads the latest updated submission for each country."""
  200. actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
  201. f"--submission_year={read_config_crf['submission_year']} ",
  202. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  203. f"--folder=extracted_data/UNFCCC"
  204. ]
  205. # specifying countries is currently disabled duo to problems with command line
  206. # list arguments
  207. #if read_config_crf["countries"] is not None:
  208. # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  209. if read_config_crf["re_read"] == "True":
  210. actions[0] = actions[0] + " --re_read"
  211. return {
  212. #'basename': "Read_CRF_year",
  213. 'actions': actions,
  214. 'task_dep': ['set_env'],
  215. 'verbosity': 2,
  216. 'setup': ['setup_venv'],
  217. }
  218. def task_test_read_unfccc_crf_for_year():
  219. """ Read CRF submission for all countries for given submission year. by default only reads
  220. data not present yet. Only reads the latest updated submission for each country."""
  221. actions = [f"./venv/bin/python "
  222. f"UNFCCC_GHG_data/UNFCCC_CRF_reader"
  223. f"/test_read_UNFCCC_CRF_for_year.py "
  224. f"--submission_year={read_config_crf['submission_year']} "
  225. f"--country={read_config_crf['country']} "
  226. ]
  227. if read_config_crf["totest"] == "True":
  228. actions[0] = actions[0] + " --totest"
  229. if read_config_crf["data_year"] is not None:
  230. actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  231. return {
  232. #'basename': "Read_CRF_year",
  233. 'actions': actions,
  234. 'task_dep': ['set_env'],
  235. 'verbosity': 2,
  236. 'setup': ['setup_venv'],
  237. }
  238. def task_compile_raw_unfccc_crf_for_year():
  239. """ Read CRF submission for all countries for given submission year. by default only reads
  240. data not present yet. Only reads the latest updated submission for each country."""
  241. actions = [f"./venv/bin/python "
  242. f"UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py "
  243. f"--submission_year={read_config_crf['submission_year']} "
  244. ]
  245. return {
  246. 'actions': actions,
  247. 'task_dep': ['set_env'],
  248. 'verbosity': 2,
  249. 'setup': ['setup_venv'],
  250. }
  251. # tasks for DI reader
  252. # datalad run is called from within the read_UNFCCC_DI_for_country.py script
  253. read_config_di = {
  254. "country": get_var('country', None),
  255. "date": get_var('date', None),
  256. "annexI": get_var('annexI', False),
  257. #"countries": get_var('countries', None),
  258. }
  259. def task_read_unfccc_di_for_country():
  260. """ Read DI data for a country """
  261. actions = [
  262. f"./venv/bin/python "
  263. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_datalad.py "
  264. f"--country={read_config_di['country']}",
  265. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  266. f"--folder=extracted_data/UNFCCC"
  267. ]
  268. return {
  269. 'actions': actions,
  270. 'task_dep': ['set_env'],
  271. 'verbosity': 2,
  272. 'setup': ['setup_venv'],
  273. }
  274. def task_process_unfccc_di_for_country():
  275. """ Process DI data for a country """
  276. actions = [
  277. f"./venv/bin/python "
  278. f"UNFCCC_GHG_data/UNFCCC_DI_reader/process_UNFCCC_DI_for_country_datalad.py "
  279. f"--country={read_config_di['country']} --date={read_config_di['date']}",
  280. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  281. f"--folder=extracted_data/UNFCCC"
  282. ]
  283. return {
  284. 'actions': actions,
  285. 'task_dep': ['set_env'],
  286. 'verbosity': 2,
  287. 'setup': ['setup_venv'],
  288. }
  289. def task_read_unfccc_di_for_country_group():
  290. """ Read DI data for a country group """
  291. actions = [
  292. f"./venv/bin/python "
  293. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_group_datalad.py",
  294. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  295. f"--folder=extracted_data/UNFCCC"
  296. ]
  297. if read_config_di["annexI"] == "True":
  298. actions[0] = actions[0] + " --annexI"
  299. return {
  300. 'actions': actions,
  301. 'task_dep': ['set_env'],
  302. 'verbosity': 2,
  303. 'setup': ['setup_venv'],
  304. }
  305. def task_process_unfccc_di_for_country_group():
  306. """ Process DI data for a country group """
  307. actions = [
  308. f"./venv/bin/python "
  309. f"UNFCCC_GHG_data/UNFCCC_DI_reader/process_UNFCCC_DI_for_country_group_datalad"
  310. f".py",
  311. ]
  312. if read_config_di["annexI"] == "True":
  313. actions[0] = actions[0] + " --annexI"
  314. if read_config_di["date"] is not None:
  315. actions[0] = actions[0] + f" --date={read_config_di['date']}"
  316. return {
  317. 'actions': actions,
  318. 'task_dep': ['set_env'],
  319. 'verbosity': 2,
  320. 'setup': ['setup_venv'],
  321. }
  322. # general tasks
  323. def task_country_info():
  324. """ Print information on submissions and datasets
  325. available for given country"""
  326. return {
  327. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/country_info.py "
  328. f"--country={read_config['country']}"],
  329. 'task_dep': ['set_env'],
  330. 'verbosity': 2,
  331. 'setup': ['setup_venv'],
  332. }