dodo.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. # define tasks for UNFCCC data repository
  2. from doit import get_var
  3. import os
  4. # TODO: task for folder mapping
  5. # create virtual environment
  6. def task_setup_venv():
  7. """Create virtual environment"""
  8. return {
  9. 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  10. 'actions': ['python3 -m venv venv',
  11. './venv/bin/pip install --upgrade pip wheel',
  12. #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
  13. './venv/bin/pip install --upgrade --upgrade-strategy '
  14. 'eager -e .[dev]',
  15. 'touch venv',],
  16. 'targets': ['venv'],
  17. 'verbosity': 2,
  18. }
  19. # set UNFCCC_GHG_ROOT_PATH environment variable
  20. def task_set_env():
  21. """
  22. Set the environment variable for the module so data is stored in the correct folders
  23. """
  24. def set_root_path():
  25. os.environ["UNFCCC_GHG_ROOT_PATH"] = "."
  26. return {
  27. 'actions': [set_root_path],
  28. }
  29. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  30. read_config_folder = {
  31. "folder": get_var('folder', None),
  32. }
  33. def task_map_folders():
  34. """
  35. Create or update the folder mapping in the given folder
  36. """
  37. return {
  38. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  39. f"--folder={read_config_folder['folder']}"],
  40. 'task_dep': ['set_env'],
  41. 'verbosity': 2,
  42. 'setup': ['setup_venv'],
  43. }
  44. # Tasks for getting submissions and downloading them
  45. def task_update_bur():
  46. """ Update list of BUR submissions """
  47. return {
  48. 'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  49. 'actions': ['datalad run -m "Fetch BUR submissions" '
  50. '-o downloaded_data/UNFCCC/submissions-bur.csv '
  51. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
  52. 'task_dep': ['set_env'],
  53. 'verbosity': 2,
  54. 'setup': ['setup_venv'],
  55. }
  56. def task_download_bur():
  57. """ Download BUR submissions """
  58. return {
  59. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  60. # deactivate file_dep fow now as it will always run fetch submissions
  61. # before download
  62. 'actions': ['datalad run -m "Download BUR submissions" '
  63. '-i downloaded_data/UNFCCC/submissions-bur.csv '
  64. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
  65. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  66. f"--folder=downloaded_data/UNFCCC"
  67. ],
  68. 'task_dep': ['set_env'],
  69. 'verbosity': 2,
  70. 'setup': ['setup_venv'],
  71. }
  72. def task_update_nc():
  73. """ Update list of NC submissions """
  74. return {
  75. 'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  76. 'actions': ['datalad run -m "Fetch NC submissions" '
  77. '-o downloaded_data/UNFCCC/submissions-nc.csv '
  78. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
  79. 'task_dep': ['set_env'],
  80. 'verbosity': 2,
  81. 'setup': ['setup_venv'],
  82. }
  83. def task_download_nc():
  84. """ Download NC submissions """
  85. return {
  86. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  87. # deactivate file_dep fow now as it will always run fetch submissions
  88. # before download
  89. 'actions': ['datalad run -m "Download NC submissions" '
  90. '-i downloaded_data/UNFCCC/submissions-nc.csv '
  91. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
  92. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  93. f"--folder=downloaded_data/UNFCCC"
  94. ],
  95. 'task_dep': ['set_env'],
  96. 'verbosity': 2,
  97. 'setup': ['setup_venv'],
  98. }
  99. # annexI data: one update call for all data types (as they are on one page)
  100. # but for each year separately.
  101. # downloading is per year and
  102. update_aI_config = {
  103. "year": get_var('year', None),
  104. "category": get_var('category', None),
  105. }
  106. def task_update_annexi():
  107. """ Update list of AnnexI submissions """
  108. return {
  109. 'targets': [f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"],
  110. 'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
  111. "--explicit "
  112. f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  113. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
  114. f"--year={update_aI_config['year']}"],
  115. 'task_dep': ['set_env'],
  116. 'verbosity': 2,
  117. 'setup': ['setup_venv'],
  118. }
  119. def task_download_annexi():
  120. """ Download AnnexI submissions """
  121. return {
  122. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  123. # deactivate file_dep fow now as it will always run fetch submissions
  124. # before download
  125. 'actions': [f"datalad run -m 'Download AnnexI submissions for "
  126. f"{update_aI_config['category']}{update_aI_config['year']}' "
  127. f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  128. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
  129. f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
  130. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  131. f"--folder=downloaded_data/UNFCCC"
  132. ],
  133. 'task_dep': ['set_env'],
  134. 'verbosity': 2,
  135. 'setup': ['setup_venv'],
  136. }
  137. # annexI data: one update call for all data types (as they are on one page)
  138. # but for each year separately.
  139. # downloading is per year and
  140. update_btr_config = {
  141. "round": get_var('round', None),
  142. }
  143. def task_update_btr():
  144. """ Update list of BTR submissions """
  145. return {
  146. 'targets': [f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"],
  147. 'actions': [f"datalad run -m 'Fetch Biannial Transparency Report submissions for BTR{update_btr_config['round']}' "
  148. "--explicit "
  149. f"-o downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv "
  150. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_btr.py "
  151. f"--round={update_btr_config['round']}"],
  152. 'task_dep': ['set_env'],
  153. 'verbosity': 2,
  154. 'setup': ['setup_venv'],
  155. }
  156. def task_download_btr():
  157. """ Download BTR submissions """
  158. return {
  159. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  160. # deactivate file_dep fow now as it will always run fetch submissions
  161. # before download
  162. 'actions': [f"datalad run -m 'Download BTR submissions for "
  163. f"BTR{update_btr_config['round']}' "
  164. f"-i downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv "
  165. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_btr.py "
  166. f"--round={update_btr_config['round']}",
  167. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  168. f"--folder=downloaded_data/UNFCCC"
  169. ],
  170. 'task_dep': ['set_env'],
  171. 'verbosity': 2,
  172. 'setup': ['setup_venv'],
  173. }
  174. def task_download_ndc():
  175. """ Download NDC submissions """
  176. return {
  177. 'actions': ['datalad run -m "Download NDC submissions" '
  178. './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
  179. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  180. f"--folder=downloaded_data/UNFCCC"
  181. ],
  182. 'task_dep': ['set_env'],
  183. 'verbosity': 2,
  184. 'setup': ['setup_venv'],
  185. }
  186. # read UNFCCC submissions.
  187. # datalad run is called from within the read_UNFCCC_submission.py script
  188. read_config = {
  189. "country": get_var('country', None),
  190. "submission": get_var('submission', None),
  191. }
  192. # TODO: make individual task for non-UNFCCC submissions
  193. def task_read_unfccc_submission():
  194. """ Read submission for a country (if code exists) (not for CRF)"""
  195. return {
  196. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
  197. f"--country={read_config['country']} --submission={read_config['submission']}",
  198. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  199. f"--folder=extracted_data/UNFCCC"
  200. ],
  201. 'task_dep': ['set_env'],
  202. 'verbosity': 2,
  203. 'setup': ['setup_venv'],
  204. }
  205. # read UNFCCC submissions.
  206. # datalad run is called from within the read_UNFCCC_submission.py script
  207. read_config_crf = {
  208. "country": get_var('country', None),
  209. "submission_year": get_var('submission_year', None),
  210. "submission_date": get_var('submission_date', None),
  211. "re_read": get_var('re_read', False),
  212. "countries": get_var('countries', None),
  213. "data_year": get_var('data_year', None),
  214. "totest": get_var('totest', None),
  215. }
  216. def task_read_unfccc_crf_submission():
  217. """ Read CRF submission for a country """
  218. actions = [
  219. f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
  220. f"--country={read_config_crf['country']} "
  221. f"--submission_year={read_config_crf['submission_year']} "
  222. f"--submission_date={read_config_crf['submission_date']} ",
  223. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  224. f"--folder=extracted_data/UNFCCC"
  225. ]
  226. if read_config_crf["re_read"] == "True":
  227. actions[0] = actions[0] + " --re_read"
  228. return {
  229. 'actions': actions,
  230. 'task_dep': ['set_env'],
  231. 'verbosity': 2,
  232. 'setup': ['setup_venv'],
  233. }
  234. def task_read_new_unfccc_crf_for_year():
  235. """ Read CRF submission for all countries for given submission year. by default only reads
  236. data not present yet. Only reads the latest updated submission for each country."""
  237. actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
  238. f"--submission_year={read_config_crf['submission_year']} ",
  239. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  240. f"--folder=extracted_data/UNFCCC"
  241. ]
  242. # specifying countries is currently disabled duo to problems with command line
  243. # list arguments
  244. #if read_config_crf["countries"] is not None:
  245. # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  246. if read_config_crf["re_read"] == "True":
  247. actions[0] = actions[0] + " --re_read"
  248. return {
  249. #'basename': "Read_CRF_year",
  250. 'actions': actions,
  251. 'task_dep': ['set_env'],
  252. 'verbosity': 2,
  253. 'setup': ['setup_venv'],
  254. }
  255. def task_test_read_unfccc_crf_for_year():
  256. """ Read CRF submission for all countries for given submission year. by default only reads
  257. data not present yet. Only reads the latest updated submission for each country."""
  258. actions = [f"./venv/bin/python "
  259. f"UNFCCC_GHG_data/UNFCCC_CRF_reader"
  260. f"/test_read_UNFCCC_CRF_for_year.py "
  261. f"--submission_year={read_config_crf['submission_year']} "
  262. f"--country={read_config_crf['country']} "
  263. ]
  264. if read_config_crf["totest"] == "True":
  265. actions[0] = actions[0] + " --totest"
  266. if read_config_crf["data_year"] is not None:
  267. actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  268. return {
  269. #'basename': "Read_CRF_year",
  270. 'actions': actions,
  271. 'task_dep': ['set_env'],
  272. 'verbosity': 2,
  273. 'setup': ['setup_venv'],
  274. }
  275. def task_compile_raw_unfccc_crf_for_year():
  276. """ Read CRF submission for all countries for given submission year. by default only reads
  277. data not present yet. Only reads the latest updated submission for each country."""
  278. actions = [f"./venv/bin/python "
  279. f"UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py "
  280. f"--submission_year={read_config_crf['submission_year']} "
  281. ]
  282. return {
  283. 'actions': actions,
  284. 'task_dep': ['set_env'],
  285. 'verbosity': 2,
  286. 'setup': ['setup_venv'],
  287. }
  288. # tasks for DI reader
  289. # datalad run is called from within the read_UNFCCC_DI_for_country.py script
  290. read_config_di = {
  291. "country": get_var('country', None),
  292. "date": get_var('date', None),
  293. "annexI": get_var('annexI', False),
  294. #"countries": get_var('countries', None),
  295. }
  296. def task_read_unfccc_di_for_country():
  297. """ Read DI data for a country """
  298. actions = [
  299. f"./venv/bin/python "
  300. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_datalad.py "
  301. f"--country={read_config_di['country']}",
  302. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  303. f"--folder=extracted_data/UNFCCC"
  304. ]
  305. return {
  306. 'actions': actions,
  307. 'task_dep': ['set_env'],
  308. 'verbosity': 2,
  309. 'setup': ['setup_venv'],
  310. }
  311. def task_process_unfccc_di_for_country():
  312. """ Process DI data for a country """
  313. actions = [
  314. f"./venv/bin/python "
  315. f"UNFCCC_GHG_data/UNFCCC_DI_reader/process_UNFCCC_DI_for_country_datalad.py "
  316. f"--country={read_config_di['country']} --date={read_config_di['date']}",
  317. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  318. f"--folder=extracted_data/UNFCCC"
  319. ]
  320. return {
  321. 'actions': actions,
  322. 'task_dep': ['set_env'],
  323. 'verbosity': 2,
  324. 'setup': ['setup_venv'],
  325. }
  326. def task_read_unfccc_di_for_country_group():
  327. """ Read DI data for a country group """
  328. actions = [
  329. f"./venv/bin/python "
  330. f"UNFCCC_GHG_data/UNFCCC_DI_reader/read_UNFCCC_DI_for_country_group_datalad.py",
  331. f"./venv/bin/python UNFCCC_GHG_data/helper/folder_mapping.py "
  332. f"--folder=extracted_data/UNFCCC"
  333. ]
  334. if read_config_di["annexI"] == "True":
  335. actions[0] = actions[0] + " --annexI"
  336. return {
  337. 'actions': actions,
  338. 'task_dep': ['set_env'],
  339. 'verbosity': 2,
  340. 'setup': ['setup_venv'],
  341. }
  342. def task_process_unfccc_di_for_country_group():
  343. """ Process DI data for a country group """
  344. actions = [
  345. f"./venv/bin/python "
  346. f"UNFCCC_GHG_data/UNFCCC_DI_reader/process_UNFCCC_DI_for_country_group_datalad"
  347. f".py",
  348. ]
  349. if read_config_di["annexI"] == "True":
  350. actions[0] = actions[0] + " --annexI"
  351. if read_config_di["date"] is not None:
  352. actions[0] = actions[0] + f" --date={read_config_di['date']}"
  353. return {
  354. 'actions': actions,
  355. 'task_dep': ['set_env'],
  356. 'verbosity': 2,
  357. 'setup': ['setup_venv'],
  358. }
  359. # general tasks
  360. def task_country_info():
  361. """ Print information on submissions and datasets
  362. available for given country"""
  363. return {
  364. 'actions': [f"./venv/bin/python UNFCCC_GHG_data/helper/country_info.py "
  365. f"--country={read_config['country']}"],
  366. 'task_dep': ['set_env'],
  367. 'verbosity': 2,
  368. 'setup': ['setup_venv'],
  369. }