CR
/
unfcc_di_data


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
							"""Download the data for all countries from the """
import pandas as pd
import tqdm
import os
import pathlib

from unfccc_di_api import UNFCCCApiReader

ROOT_DIR = pathlib.Path(os.path.abspath(os.curdir))


def main():
    r = UNFCCCApiReader()
    dfs = []
    for party in tqdm.tqdm(r.parties["code"], desc="parties"):
        df = r.query(party_code=party, progress=False)

        annexI = party in r.annex_one_reader.parties["code"].values
        subdir = "annexI" if annexI else "non-annexI"
        directory = ROOT_DIR / "data" / subdir
        directory.mkdir(parents=True, exist_ok=True)
        # CSV compressed with gzip is a very widely used standard
        # pass mtime=0 explicitly so that the creation time is not embedded in the
        # produced gzip file, which means that the gzip file doesn't change if the
        # contents haven't changed.
        df.to_csv(
            directory / f"{party}.csv.gz",
            compression={"method": 'gzip', "mtime": 0}
        )
        dfs.append(df)

    # Save data for all parties into one big file for easy distribution.
    # parquet with zstd compression is a very efficient binary format
    # unfortunately, it embeds the used software versions in the metadata so
    # generally, the parquet file can change even if the contents haven't changed.
    # We disable index and statistics which we don't use.
    df_all = pd.concat(dfs)
    df_all.to_parquet(
        ROOT_DIR / "data" / "all.parquet",
        engine="fastparquet",
        compression="zstd",
        index=False,
        stats=False
    )


if __name__ == "__main__":
    main()