dodo.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. """
  2. Define the tasks for UNFCCC data repository
  3. """
  4. import os
  5. import sys
  6. import datalad.api
  7. from doit import get_var
  8. root_path = "."
  9. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  10. # TODO: task for folder mapping
  11. # create virtual environment
  12. # def task_setup_venv():
  13. # """Create virtual environment"""
  14. # return {
  15. # 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  16. # 'actions': ['python3 -m venv venv',
  17. # './venv/bin/pip install --upgrade pip wheel',
  18. # #'./venv/bin/pip install -Ur unfccc_ghg_data/requirements.txt',
  19. # './venv/bin/pip install --upgrade --upgrade-strategy '
  20. # 'eager -e .[dev]',
  21. # 'touch venv',],
  22. # 'targets': ['venv'],
  23. # 'verbosity': 2,
  24. # }
  25. def set_root_path():
  26. """Set the root folder for the repository"""
  27. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  28. def map_folders(parent_folder):
  29. """
  30. Create or update the folder mapping in the given folder
  31. Internal function
  32. """
  33. datalad.api.run(
  34. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  35. f"--folder={parent_folder}",
  36. dataset=root_path,
  37. message=f"Update folder mapping for {parent_folder}",
  38. outputs=f"{parent_folder}/folder_mapping.json",
  39. dry_run=None,
  40. explicit=True,
  41. )
  42. def task_in_venv():
  43. """
  44. Check if code run from virtual environment and throw an error is not.
  45. Returns
  46. -------
  47. Nothing
  48. """
  49. def in_venv():
  50. if sys.prefix == sys.base_prefix:
  51. raise ValueError( # noqa: TRY003
  52. "You need to run the code from the virtual environment."
  53. )
  54. return {
  55. "actions": [in_venv],
  56. }
  57. # set UNFCCC_GHG_ROOT_PATH environment variable
  58. def task_set_env():
  59. """
  60. Set the environment variable for the module so data is stored in the correct folders
  61. """
  62. return {
  63. "actions": [set_root_path],
  64. }
  65. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  66. read_config_folder = {
  67. "folder": get_var("folder", None),
  68. }
  69. def task_map_folders():
  70. """
  71. Create or update the folder mapping in the given folder
  72. """
  73. return {
  74. "actions": [(map_folders, [read_config_folder["folder"]])],
  75. "verbosity": 2,
  76. "setup": ["in_venv"],
  77. }
  78. # Tasks for getting submissions and downloading them
  79. def task_update_bur():
  80. """Update list of BUR submissions"""
  81. def fetch_bur():
  82. datalad.api.run(
  83. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  84. "fetch_submissions_bur.py",
  85. dataset=root_path,
  86. message="Fetch BUR submissions",
  87. outputs="downloaded_data/UNFCCC/submissions-bur.csv",
  88. dry_run=True,
  89. explicit=True,
  90. )
  91. return {
  92. "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
  93. # "actions": [
  94. # 'datalad run -m "Fetch BUR submissions" '
  95. # "-o downloaded_data/UNFCCC/submissions-bur.csv "
  96. # "python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py"
  97. # ],
  98. "actions": [
  99. (fetch_bur,),
  100. ],
  101. "verbosity": 2,
  102. "setup": ["in_venv"],
  103. }
  104. def task_download_bur():
  105. """Download BUR submissions"""
  106. def download_bur():
  107. (
  108. datalad.api.run(
  109. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  110. "download_nonannexI.py --category=BUR",
  111. dataset=root_path,
  112. message="Download BUR submissions",
  113. inputs="downloaded_data/UNFCCC/submissions-bur.csv",
  114. dry_run=None,
  115. explicit=False,
  116. ),
  117. )
  118. return {
  119. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  120. # deactivate file_dep fow now as it will always run fetch submissions
  121. # before download
  122. "actions": [
  123. (download_bur,),
  124. (map_folders, ["downloaded_data/UNFCCC"]),
  125. ],
  126. "verbosity": 2,
  127. "setup": ["in_venv"],
  128. }
  129. #
  130. #
  131. # def task_update_nc():
  132. # """Update list of NC submissions"""
  133. # return {
  134. # "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
  135. # "actions": [
  136. # set_root_path(),
  137. # datalad.api.run(
  138. # cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  139. # "fetch_submissions_nc.py",
  140. # dataset=root_path,
  141. # message="Fetch NC submissions",
  142. # outputs="downloaded_data/UNFCCC/submissions-nc.csv",
  143. # dry_run=None,
  144. # explicit=True,
  145. # ),
  146. # ],
  147. # "verbosity": 2,
  148. # "setup": ["in_venv"],
  149. # }
  150. #
  151. #
  152. # def task_download_nc():
  153. # """Download NC submissions"""
  154. # return {
  155. # #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  156. # # deactivate file_dep fow now as it will always run fetch submissions
  157. # # before download
  158. # "actions": [
  159. # set_root_path(),
  160. # datalad.api.run(
  161. # cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  162. # "download_nonannexI.py --category=NC",
  163. # dataset=root_path,
  164. # message="Download NC submissions",
  165. # inputs="downloaded_data/UNFCCC/submissions-nc.csv",
  166. # dry_run=None,
  167. # explicit=False,
  168. # ),
  169. # map_folders("downloaded_data/UNFCCC"),
  170. # ],
  171. # "verbosity": 2,
  172. # "setup": ["in_venv"],
  173. # }
  174. #
  175. #
  176. # # annexI data: one update call for all data types (as they are on one page)
  177. # # but for each year separately.
  178. # # downloading is per year and
  179. # update_aI_config = {
  180. # "year": get_var("year", None),
  181. # "category": get_var("category", None),
  182. # }
  183. #
  184. #
  185. # def task_update_annexi():
  186. # """Update list of AnnexI submissions"""
  187. # return {
  188. # "targets": [
  189. # f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
  190. # ],
  191. # "actions": [
  192. # set_root_path(),
  193. # datalad.api.run(
  194. # cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  195. # "fetch_submissions_annexI.py "
  196. # f"--year={update_aI_config['year']}",
  197. # dataset=root_path,
  198. # message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
  199. # outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  200. # f"{update_aI_config['year']}.csv",
  201. # dry_run=None,
  202. # explicit=True,
  203. # ),
  204. # ],
  205. # "verbosity": 2,
  206. # "setup": ["in_venv"],
  207. # }
  208. #
  209. #
  210. # def task_download_annexi():
  211. # """Download AnnexI submissions"""
  212. # return {
  213. # #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  214. # # deactivate file_dep fow now as it will always run fetch submissions
  215. # # before download
  216. # "actions": [
  217. # set_root_path(),
  218. # datalad.api.run(
  219. # cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  220. # f"--category={update_aI_config['category']} "
  221. # f"--year={update_aI_config['year']}",
  222. # dataset=root_path,
  223. # message=f"Download AnnexI submissions for "
  224. # f"{update_aI_config['category']}"
  225. # f"{update_aI_config['year']}",
  226. # inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  227. # f"{update_aI_config['year']}.csv",
  228. # dry_run=None,
  229. # explicit=False,
  230. # ),
  231. # map_folders("downloaded_data/UNFCCC"),
  232. # ],
  233. # "verbosity": 2,
  234. # "setup": ["in_venv"],
  235. # }
  236. #
  237. #
  238. # # annexI data: one update call for all data types (as they are on one page)
  239. # # but for each year separately.
  240. # # downloading is per year and
  241. # update_btr_config = {
  242. # "round": get_var("round", None),
  243. # }
  244. #
  245. #
  246. # def task_update_btr():
  247. # """Update list of BTR submissions"""
  248. # return {
  249. # "targets": [
  250. # f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
  251. # ],
  252. # "actions": [
  253. # set_root_path(),
  254. # datalad.api.run(
  255. # cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  256. # "fetch_submissions_btr.py "
  257. # f"--round={update_btr_config['round']}",
  258. # dataset=root_path,
  259. # message=f"Fetch Biannial Transparency Report submissions for "
  260. # f"BTR{update_btr_config['round']}",
  261. # outputs=f"downloaded_data/UNFCCC/submissions-BTR"
  262. # f"{update_btr_config['round']}.csv",
  263. # dry_run=None,
  264. # explicit=True,
  265. # ),
  266. # ],
  267. # "verbosity": 2,
  268. # "setup": ["in_venv"],
  269. # }
  270. #
  271. #
  272. # def task_download_btr():
  273. # """Download BTR submissions"""
  274. # return {
  275. # #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  276. # # deactivate file_dep fow now as it will always run fetch submissions
  277. # # before download
  278. # "actions": [
  279. # set_root_path(),
  280. # datalad.api.run(
  281. # cmd="src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
  282. # f"--round={update_btr_config['round']}",
  283. # dataset=root_path,
  284. # message="Download BTR submissions for "
  285. # f"BTR{update_btr_config['round']}",
  286. # inputs=f"downloaded_data/UNFCCC/submissions-BTR"
  287. # f"{update_btr_config['round']}.csv",
  288. # dry_run=None,
  289. # explicit=False,
  290. # ),
  291. # map_folders("downloaded_data/UNFCCC"),
  292. # ],
  293. # "verbosity": 2,
  294. # "setup": ["in_venv"],
  295. # }
  296. #
  297. #
  298. # def task_download_ndc():
  299. # """Download NDC submissions"""
  300. # return {
  301. # "actions": [
  302. # set_root_path(),
  303. # datalad.api.run(
  304. # cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
  305. # dataset=root_path,
  306. # message="Download NDC submissions",
  307. # inputs=None,
  308. # dry_run=None,
  309. # explicit=False,
  310. # ),
  311. # map_folders("downloaded_data/UNFCCC"),
  312. # ],
  313. # "verbosity": 2,
  314. # "setup": ["in_venv"],
  315. # }
  316. #
  317. #
  318. # # read UNFCCC submissions.
  319. # # datalad run is called from within the read_UNFCCC_submission.py script
  320. # read_config = {
  321. # "country": get_var("country", None),
  322. # "submission": get_var("submission", None),
  323. # }
  324. #
  325. #
  326. # # TODO: make individual task for non-UNFCCC submissions
  327. # def task_read_unfccc_submission():
  328. # """Read submission for a country (if code exists) (not for CRF)"""
  329. # return {
  330. # # "actions": [
  331. # # f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  332. # # f"--country={read_config['country']} --submission={read_config['submission']}",
  333. # # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  334. # # "--folder=extracted_data/UNFCCC",
  335. # # ],
  336. # "actions": [
  337. # set_root_path(),
  338. # datalad.api.run(
  339. # cmd="src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py"
  340. # f"--country={read_config['country']} "
  341. # f"--submission={read_config['submission']}",
  342. # dataset=root_path,
  343. # message=f"Read {read_config['submission']} for country "
  344. # f"{read_config['country']}",
  345. # dry_run=None,
  346. # explicit=False,
  347. # ),
  348. # map_folders("extracted_data/UNFCCC"),
  349. # ],
  350. # "verbosity": 2,
  351. # "setup": ["in_venv"],
  352. # }
  353. #
  354. #
  355. # # read UNFCCC submissions.
  356. # # datalad run is called from within the read_UNFCCC_submission.py script
  357. # read_config_crf = {
  358. # "country": get_var("country", None),
  359. # "submission_year": get_var("submission_year", None),
  360. # "submission_date": get_var("submission_date", None),
  361. # "re_read": get_var("re_read", False),
  362. # "countries": get_var("countries", None),
  363. # "data_year": get_var("data_year", None),
  364. # "totest": get_var("totest", None),
  365. # }
  366. #
  367. #
  368. # def task_read_unfccc_crf_submission():
  369. # """Read CRF submission for a country"""
  370. # actions = [
  371. # f"python src/unfccc_ghg_data/unfccc_crf_reader"
  372. # f"/read_unfccc_crf_submission_datalad.py "
  373. # f"--country={read_config_crf['country']} "
  374. # f"--submission_year={read_config_crf['submission_year']} "
  375. # f"--submission_date={read_config_crf['submission_date']} ",
  376. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  377. # "--folder=extracted_data/UNFCCC",
  378. # ]
  379. # if read_config_crf["re_read"] == "True":
  380. # actions[0] = actions[0] + " --re_read"
  381. # return {
  382. # "actions": actions,
  383. # "task_dep": ["set_env"],
  384. # "verbosity": 2,
  385. # "setup": ["in_venv"],
  386. # }
  387. #
  388. #
  389. # def task_read_new_unfccc_crf_for_year():
  390. # """
  391. # Read CRF submission for all countries for given submission year.
  392. #
  393. # By default only reads data not present yet. Only reads the latest updated
  394. # submission for each country.
  395. # """
  396. # actions = [
  397. # f"python src/unfccc_ghg_data/unfccc_crf_reader"
  398. # f"/read_new_unfccc_crf_for_year_datalad.py "
  399. # f"--submission_year={read_config_crf['submission_year']} ",
  400. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  401. # "--folder=extracted_data/UNFCCC",
  402. # ]
  403. # # specifying countries is currently disabled duo to problems with command line
  404. # # list arguments
  405. # # if read_config_crf["countries"] is not None:
  406. # # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  407. # if read_config_crf["re_read"] == "True":
  408. # actions[0] = actions[0] + " --re_read"
  409. # return {
  410. # #'basename': "Read_CRF_year",
  411. # "actions": actions,
  412. # "task_dep": ["set_env"],
  413. # "verbosity": 2,
  414. # "setup": ["in_venv"],
  415. # }
  416. #
  417. #
  418. # def task_test_read_unfccc_crf_for_year():
  419. # """
  420. # Test CRF reading.
  421. #
  422. # Test CRF with a single year only for speed and logging to extend specifications
  423. # if necessary.
  424. # """
  425. # actions = [
  426. # f"python "
  427. # f"src/unfccc_ghg_data/unfccc_crf_reader"
  428. # f"/test_read_unfccc_crf_for_year.py "
  429. # f"--submission_year={read_config_crf['submission_year']} "
  430. # f"--country={read_config_crf['country']} "
  431. # ]
  432. # if read_config_crf["totest"] == "True":
  433. # actions[0] = actions[0] + " --totest"
  434. #
  435. # if read_config_crf["data_year"] is not None:
  436. # actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  437. # return {
  438. # #'basename': "Read_CRF_year",
  439. # "actions": actions,
  440. # "task_dep": ["set_env"],
  441. # "verbosity": 2,
  442. # "setup": ["in_venv"],
  443. # }
  444. #
  445. #
  446. # def task_compile_raw_unfccc_crf_for_year():
  447. # """
  448. # Collect all latest CRF submissions for a given year
  449. #
  450. # Reads the latest data fromt he extracted data folder for each country.
  451. # Notifies the user if new data are available in the downloaded_data folder
  452. # which have not yet been read.
  453. #
  454. # Data are saved in the datasets/UNFCCC/CRFYYYY folder.
  455. # """
  456. # actions = [
  457. # f"python "
  458. # f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  459. # f"--submission_year={read_config_crf['submission_year']} "
  460. # ]
  461. # return {
  462. # "actions": actions,
  463. # "task_dep": ["set_env"],
  464. # "verbosity": 2,
  465. # "setup": ["in_venv"],
  466. # }
  467. #
  468. #
  469. # # tasks for DI reader
  470. # # datalad run is called from within the read_unfccc_di_for_country.py script
  471. # read_config_di = {
  472. # "country": get_var("country", None),
  473. # "date": get_var("date", None),
  474. # "annexI": get_var("annexI", False),
  475. # # "countries": get_var('countries', None),
  476. # }
  477. #
  478. #
  479. # def task_read_unfccc_di_for_country():
  480. # """Read DI data for a country"""
  481. # actions = [
  482. # f"python "
  483. # f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  484. # f"--country={read_config_di['country']}",
  485. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  486. # "--folder=extracted_data/UNFCCC",
  487. # ]
  488. # return {
  489. # "actions": actions,
  490. # "task_dep": ["set_env"],
  491. # "verbosity": 2,
  492. # "setup": ["in_venv"],
  493. # }
  494. #
  495. #
  496. # def task_process_unfccc_di_for_country():
  497. # """Process DI data for a country"""
  498. # actions = [
  499. # f"python "
  500. # f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  501. # f".py "
  502. # f"--country={read_config_di['country']} --date={read_config_di['date']}",
  503. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  504. # "--folder=extracted_data/UNFCCC",
  505. # ]
  506. # return {
  507. # "actions": actions,
  508. # "task_dep": ["set_env"],
  509. # "verbosity": 2,
  510. # "setup": ["in_venv"],
  511. # }
  512. #
  513. #
  514. # def task_read_unfccc_di_for_country_group():
  515. # """Read DI data for a country group"""
  516. # actions = [
  517. # "python "
  518. # "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  519. # ".py",
  520. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  521. # "--folder=extracted_data/UNFCCC",
  522. # ]
  523. # if read_config_di["annexI"] == "True":
  524. # actions[0] = actions[0] + " --annexI"
  525. #
  526. # return {
  527. # "actions": actions,
  528. # "task_dep": ["set_env"],
  529. # "verbosity": 2,
  530. # "setup": ["in_venv"],
  531. # }
  532. #
  533. #
  534. # def task_process_unfccc_di_for_country_group():
  535. # """Process DI data for a country group"""
  536. # actions = [
  537. # "python "
  538. # "src/unfccc_ghg_data/unfccc_di_reader"
  539. # "/process_unfccc_di_for_country_group_datalad"
  540. # ".py",
  541. # ]
  542. # if read_config_di["annexI"] == "True":
  543. # actions[0] = actions[0] + " --annexI"
  544. # if read_config_di["date"] is not None:
  545. # actions[0] = actions[0] + f" --date={read_config_di['date']}"
  546. #
  547. # return {
  548. # "actions": actions,
  549. # "task_dep": ["set_env"],
  550. # "verbosity": 2,
  551. # "setup": ["in_venv"],
  552. # }
  553. #
  554. #
  555. # # general tasks
  556. # def task_country_info():
  557. # """
  558. # Print information on submissions and datasets available for given country
  559. # """
  560. # return {
  561. # "actions": [
  562. # f"python src/unfccc_ghg_data/helper/country_info.py "
  563. # f"--country={read_config['country']}"
  564. # ],
  565. # "task_dep": ["set_env"],
  566. # "verbosity": 2,
  567. # "setup": ["in_venv"],
  568. # }