dodo.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. # define tasks for UNFCCC data repository
  2. from doit import get_var
  3. import os
  4. import sys
  5. # TODO: task for folder mapping
  6. # create virtual environment
  7. # def task_setup_venv():
  8. # """Create virtual environment"""
  9. # return {
  10. # 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  11. # 'actions': ['python3 -m venv venv',
  12. # './venv/bin/pip install --upgrade pip wheel',
  13. # #'./venv/bin/pip install -Ur unfccc_ghg_data/requirements.txt',
  14. # './venv/bin/pip install --upgrade --upgrade-strategy '
  15. # 'eager -e .[dev]',
  16. # 'touch venv',],
  17. # 'targets': ['venv'],
  18. # 'verbosity': 2,
  19. # }
  20. def task_in_venv():
  21. def in_venv():
  22. if sys.prefix == sys.base_prefix:
  23. raise ValueError("You need to run the code from the virtual environment.")
  24. return {
  25. 'actions': [in_venv],
  26. }
  27. # set UNFCCC_GHG_ROOT_PATH environment variable
  28. def task_set_env():
  29. """
  30. Set the environment variable for the module so data is stored in the correct folders
  31. """
  32. def set_root_path():
  33. os.environ["UNFCCC_GHG_ROOT_PATH"] = "."
  34. return {
  35. 'actions': [set_root_path],
  36. }
  37. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  38. read_config_folder = {
  39. "folder": get_var('folder', None),
  40. }
  41. def task_map_folders():
  42. """
  43. Create or update the folder mapping in the given folder
  44. """
  45. return {
  46. 'actions': [f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  47. f"--folder={read_config_folder['folder']}"],
  48. 'task_dep': ['set_env'],
  49. 'verbosity': 2,
  50. 'setup': ['in_venv'],
  51. }
  52. # Tasks for getting submissions and downloading them
  53. def task_update_bur():
  54. """ Update list of BUR submissions """
  55. return {
  56. 'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  57. 'actions': ['datalad run -m "Fetch BUR submissions" '
  58. '-o downloaded_data/UNFCCC/submissions-bur.csv '
  59. 'python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py'],
  60. 'task_dep': ['set_env'],
  61. 'verbosity': 2,
  62. 'setup': ['in_venv'],
  63. }
  64. def task_download_bur():
  65. """ Download BUR submissions """
  66. return {
  67. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  68. # deactivate file_dep fow now as it will always run fetch submissions
  69. # before download
  70. 'actions': ['datalad run -m "Download BUR submissions" '
  71. '-i downloaded_data/UNFCCC/submissions-bur.csv '
  72. 'python src/unfccc_ghg_data/unfccc_downloader/download_non-annexI.py --category=BUR',
  73. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  74. f"--folder=downloaded_data/UNFCCC"
  75. ],
  76. 'task_dep': ['set_env'],
  77. 'verbosity': 2,
  78. 'setup': ['in_venv'],
  79. }
  80. def task_update_nc():
  81. """ Update list of NC submissions """
  82. return {
  83. 'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  84. 'actions': ['datalad run -m "Fetch NC submissions" '
  85. '-o downloaded_data/UNFCCC/submissions-nc.csv '
  86. 'python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_nc.py'],
  87. 'task_dep': ['set_env'],
  88. 'verbosity': 2,
  89. 'setup': ['in_venv'],
  90. }
  91. def task_download_nc():
  92. """ Download NC submissions """
  93. return {
  94. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  95. # deactivate file_dep fow now as it will always run fetch submissions
  96. # before download
  97. 'actions': ['datalad run -m "Download NC submissions" '
  98. '-i downloaded_data/UNFCCC/submissions-nc.csv '
  99. 'python src/unfccc_ghg_data/unfccc_downloader/download_non-annexI.py --category=NC',
  100. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  101. f"--folder=downloaded_data/UNFCCC"
  102. ],
  103. 'task_dep': ['set_env'],
  104. 'verbosity': 2,
  105. 'setup': ['in_venv'],
  106. }
  107. # annexI data: one update call for all data types (as they are on one page)
  108. # but for each year separately.
  109. # downloading is per year and
  110. update_aI_config = {
  111. "year": get_var('year', None),
  112. "category": get_var('category', None),
  113. }
  114. def task_update_annexi():
  115. """ Update list of AnnexI submissions """
  116. return {
  117. 'targets': [f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"],
  118. 'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
  119. "--explicit "
  120. f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  121. f"python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_annexI.py "
  122. f"--year={update_aI_config['year']}"],
  123. 'task_dep': ['set_env'],
  124. 'verbosity': 2,
  125. 'setup': ['in_venv'],
  126. }
  127. def task_download_annexi():
  128. """ Download AnnexI submissions """
  129. return {
  130. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  131. # deactivate file_dep fow now as it will always run fetch submissions
  132. # before download
  133. 'actions': [f"datalad run -m 'Download AnnexI submissions for "
  134. f"{update_aI_config['category']}{update_aI_config['year']}' "
  135. f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
  136. f"python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  137. f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
  138. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  139. f"--folder=downloaded_data/UNFCCC"
  140. ],
  141. 'task_dep': ['set_env'],
  142. 'verbosity': 2,
  143. 'setup': ['in_venv'],
  144. }
  145. def task_download_ndc():
  146. """ Download NDC submissions """
  147. return {
  148. 'actions': ['datalad run -m "Download NDC submissions" '
  149. 'python src/unfccc_ghg_data/unfccc_downloader/download_ndc.py',
  150. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  151. f"--folder=downloaded_data/UNFCCC"
  152. ],
  153. 'task_dep': ['set_env'],
  154. 'verbosity': 2,
  155. 'setup': ['in_venv'],
  156. }
  157. # read UNFCCC submissions.
  158. # datalad run is called from within the read_UNFCCC_submission.py script
  159. read_config = {
  160. "country": get_var('country', None),
  161. "submission": get_var('submission', None),
  162. }
  163. # TODO: make individual task for non-UNFCCC submissions
  164. def task_read_unfccc_submission():
  165. """ Read submission for a country (if code exists) (not for CRF)"""
  166. return {
  167. 'actions': [f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  168. f"--country={read_config['country']} --submission={read_config['submission']}",
  169. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  170. f"--folder=extracted_data/UNFCCC"
  171. ],
  172. 'task_dep': ['set_env'],
  173. 'verbosity': 2,
  174. 'setup': ['in_venv'],
  175. }
  176. # read UNFCCC submissions.
  177. # datalad run is called from within the read_UNFCCC_submission.py script
  178. read_config_crf = {
  179. "country": get_var('country', None),
  180. "submission_year": get_var('submission_year', None),
  181. "submission_date": get_var('submission_date', None),
  182. "re_read": get_var('re_read', False),
  183. "countries": get_var('countries', None),
  184. "data_year": get_var('data_year', None),
  185. "totest": get_var('totest', None),
  186. }
  187. def task_read_unfccc_crf_submission():
  188. """ Read CRF submission for a country """
  189. actions = [
  190. f"python src/unfccc_ghg_data/unfccc_crf_reader"
  191. f"/read_unfccc_crf_submission_datalad.py "
  192. f"--country={read_config_crf['country']} "
  193. f"--submission_year={read_config_crf['submission_year']} "
  194. f"--submission_date={read_config_crf['submission_date']} ",
  195. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  196. f"--folder=extracted_data/UNFCCC"
  197. ]
  198. if read_config_crf["re_read"] == "True":
  199. actions[0] = actions[0] + " --re_read"
  200. return {
  201. 'actions': actions,
  202. 'task_dep': ['set_env'],
  203. 'verbosity': 2,
  204. 'setup': ['in_venv'],
  205. }
  206. def task_read_new_unfccc_crf_for_year():
  207. """ Read CRF submission for all countries for given submission year. by default only reads
  208. data not present yet. Only reads the latest updated submission for each country."""
  209. actions = [f"python src/unfccc_ghg_data/unfccc_crf_reader"
  210. f"/read_new_unfccc_crf_for_year_datalad.py "
  211. f"--submission_year={read_config_crf['submission_year']} ",
  212. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  213. f"--folder=extracted_data/UNFCCC"
  214. ]
  215. # specifying countries is currently disabled duo to problems with command line
  216. # list arguments
  217. #if read_config_crf["countries"] is not None:
  218. # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  219. if read_config_crf["re_read"] == "True":
  220. actions[0] = actions[0] + " --re_read"
  221. return {
  222. #'basename': "Read_CRF_year",
  223. 'actions': actions,
  224. 'task_dep': ['set_env'],
  225. 'verbosity': 2,
  226. 'setup': ['in_venv'],
  227. }
  228. def task_test_read_unfccc_crf_for_year():
  229. """ Read CRF submission for all countries for given submission year. by default only reads
  230. data not present yet. Only reads the latest updated submission for each country."""
  231. actions = [f"python "
  232. f"src/unfccc_ghg_data/unfccc_crf_reader"
  233. f"/test_read_unfccc_crf_for_year.py "
  234. f"--submission_year={read_config_crf['submission_year']} "
  235. f"--country={read_config_crf['country']} "
  236. ]
  237. if read_config_crf["totest"] == "True":
  238. actions[0] = actions[0] + " --totest"
  239. if read_config_crf["data_year"] is not None:
  240. actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  241. return {
  242. #'basename': "Read_CRF_year",
  243. 'actions': actions,
  244. 'task_dep': ['set_env'],
  245. 'verbosity': 2,
  246. 'setup': ['in_venv'],
  247. }
  248. def task_compile_raw_unfccc_crf_for_year():
  249. """ Read CRF submission for all countries for given submission year. by default only reads
  250. data not present yet. Only reads the latest updated submission for each country."""
  251. actions = [f"python "
  252. f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  253. f"--submission_year={read_config_crf['submission_year']} "
  254. ]
  255. return {
  256. 'actions': actions,
  257. 'task_dep': ['set_env'],
  258. 'verbosity': 2,
  259. 'setup': ['in_venv'],
  260. }
  261. # tasks for DI reader
  262. # datalad run is called from within the read_unfccc_di_for_country.py script
  263. read_config_di = {
  264. "country": get_var('country', None),
  265. "date": get_var('date', None),
  266. "annexI": get_var('annexI', False),
  267. #"countries": get_var('countries', None),
  268. }
  269. def task_read_unfccc_di_for_country():
  270. """ Read DI data for a country """
  271. actions = [
  272. f"python "
  273. f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  274. f"--country={read_config_di['country']}",
  275. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  276. f"--folder=extracted_data/UNFCCC"
  277. ]
  278. return {
  279. 'actions': actions,
  280. 'task_dep': ['set_env'],
  281. 'verbosity': 2,
  282. 'setup': ['in_venv'],
  283. }
  284. def task_process_unfccc_di_for_country():
  285. """ Process DI data for a country """
  286. actions = [
  287. f"python "
  288. f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  289. f".py "
  290. f"--country={read_config_di['country']} --date={read_config_di['date']}",
  291. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  292. f"--folder=extracted_data/UNFCCC"
  293. ]
  294. return {
  295. 'actions': actions,
  296. 'task_dep': ['set_env'],
  297. 'verbosity': 2,
  298. 'setup': ['in_venv'],
  299. }
  300. def task_read_unfccc_di_for_country_group():
  301. """ Read DI data for a country group """
  302. actions = [
  303. f"python "
  304. f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  305. f".py",
  306. f"python src/unfccc_ghg_data/helper/folder_mapping.py "
  307. f"--folder=extracted_data/UNFCCC"
  308. ]
  309. if read_config_di["annexI"] == "True":
  310. actions[0] = actions[0] + " --annexI"
  311. return {
  312. 'actions': actions,
  313. 'task_dep': ['set_env'],
  314. 'verbosity': 2,
  315. 'setup': ['in_venv'],
  316. }
  317. def task_process_unfccc_di_for_country_group():
  318. """ Process DI data for a country group """
  319. actions = [
  320. f"python "
  321. f"src/unfccc_ghg_data/unfccc_di_reader"
  322. f"/process_unfccc_di_for_country_group_datalad"
  323. f".py",
  324. ]
  325. if read_config_di["annexI"] == "True":
  326. actions[0] = actions[0] + " --annexI"
  327. if read_config_di["date"] is not None:
  328. actions[0] = actions[0] + f" --date={read_config_di['date']}"
  329. return {
  330. 'actions': actions,
  331. 'task_dep': ['set_env'],
  332. 'verbosity': 2,
  333. 'setup': ['in_venv'],
  334. }
  335. # general tasks
  336. def task_country_info():
  337. """ Print information on submissions and datasets
  338. available for given country"""
  339. return {
  340. 'actions': [f"python src/unfccc_ghg_data/helper/country_info.py "
  341. f"--country={read_config['country']}"],
  342. 'task_dep': ['set_env'],
  343. 'verbosity': 2,
  344. 'setup': ['in_venv'],
  345. }