ソースを参照

[DATALAD] Recorded changes

Daniel Busch 1 日 前
コミット
b11d452258

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CH4_reproduce23.csv

@@ -0,0 +1 @@
+.git/annex/objects/3m/PQ/MD5E-s439--3a3c3c747faacd58172b97058b513a28.csv/MD5E-s439--3a3c3c747faacd58172b97058b513a28.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CO2_reproduce23.csv

@@ -0,0 +1 @@
+.git/annex/objects/5g/kM/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_N2O_reproduce23.csv

@@ -0,0 +1 @@
+.git/annex/objects/1w/6J/MD5E-s667--c1223392c238ed9d3e38a0fe1cb62565.csv/MD5E-s667--c1223392c238ed9d3e38a0fe1cb62565.csv

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.csv

@@ -1 +1 @@
-../../.git/annex/objects/9q/VQ/MD5E-s10232750--a1e11b106edb0841bb19d7e04a65bb69.csv/MD5E-s10232750--a1e11b106edb0841bb19d7e04a65bb69.csv
+../../.git/annex/objects/47/1F/MD5E-s5763420--61fa57295aeb26589f60f650fffc0826.csv/MD5E-s5763420--61fa57295aeb26589f60f650fffc0826.csv

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.nc

@@ -1 +1 @@
-../../.git/annex/objects/59/VF/MD5E-s3250602--1b5cae13af1c9fdb2e532c141dd36503.nc/MD5E-s3250602--1b5cae13af1c9fdb2e532c141dd36503.nc
+../../.git/annex/objects/WJ/g4/MD5E-s1798444--e8ba00d8e0d59fc8ee4972903afcc4f8.nc/MD5E-s1798444--e8ba00d8e0d59fc8ee4972903afcc4f8.nc

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13_raw.csv

@@ -1 +1 @@
-../../.git/annex/objects/jQ/9f/MD5E-s32990120--9b50fb64b61dd771fd55940f9722967a.csv/MD5E-s32990120--9b50fb64b61dd771fd55940f9722967a.csv
+../../.git/annex/objects/mf/w8/MD5E-s34004864--ee0bfaa28c0839ef173253af63c6b3fb.csv/MD5E-s34004864--ee0bfaa28c0839ef173253af63c6b3fb.csv

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13_raw.nc

@@ -1 +1 @@
-../../.git/annex/objects/WJ/J0/MD5E-s13246774--9d03cc903c7ef543debd02c54536c45f.nc/MD5E-s13246774--9d03cc903c7ef543debd02c54536c45f.nc
+../../.git/annex/objects/zg/fG/MD5E-s14095580--cb5d17687b666c68adb063acc8dba832.nc/MD5E-s14095580--cb5d17687b666c68adb063acc8dba832.nc

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/3G/Xw/MD5E-s10455053--2d3b5df6f172a6393c9334b6783ed0cc.csv/MD5E-s10455053--2d3b5df6f172a6393c9334b6783ed0cc.csv
+../../.git/annex/objects/Zq/Xx/MD5E-s10459474--d82eda959df55e229320cb05012d7853.csv/MD5E-s10459474--d82eda959df55e229320cb05012d7853.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.nc

@@ -1 +1 @@
-../../.git/annex/objects/xx/23/MD5E-s3329305--a338fd39d4c8e44364f7c5e0267d71f5.nc/MD5E-s3329305--a338fd39d4c8e44364f7c5e0267d71f5.nc
+../../.git/annex/objects/V4/pG/MD5E-s3341536--53e447409452734a496b3ffea3647f59.nc/MD5E-s3341536--53e447409452734a496b3ffea3647f59.nc

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.csv

@@ -1 +1 @@
-../../.git/annex/objects/xv/3j/MD5E-s33059797--a79a80ee8a3b836f7c376a4d23dfbef4.csv/MD5E-s33059797--a79a80ee8a3b836f7c376a4d23dfbef4.csv
+../../.git/annex/objects/VK/1Q/MD5E-s33863965--e76229b224f4df989def18921ef16c6a.csv/MD5E-s33863965--e76229b224f4df989def18921ef16c6a.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc

@@ -1 +1 @@
-../../.git/annex/objects/37/9w/MD5E-s14515527--56222b762f2646fe6e0e6b4f1e9088fc.nc/MD5E-s14515527--56222b762f2646fe6e0e6b4f1e9088fc.nc
+../../.git/annex/objects/mw/x3/MD5E-s15066036--f6b379d199f0f0de6d96e2294d6a7b56.nc/MD5E-s15066036--f6b379d199f0f0de6d96e2294d6a7b56.nc

+ 494 - 0
fao_total_specifications_2023.m

@@ -0,0 +1,494 @@
+function specifications = fao_total_specifications_2023(sourcename)
+% fao_specifications just creates a structure containing all informations
+%   needed to parse the fao files...
+%
+% INPUT PARAMETERS: None
+%
+% RETURN PARAMETERS:
+%   specifications: a structure with many constants needed to parse the
+%   input xls files for the program read_bulkplus_sheet.
+%   specifications contains two fields, 'keystruct', and 'mapstruct'.
+%   - specifications.keystruct - is a structure of sheet fieldnames (e.g.
+%     'sheet_category'), where the value is a cellstr of the labels that
+%     should be used to assign the value. The list is taken in the order it
+%     is listed.
+%   - specifications.mapstruct - is a structure that maps column headers (or other
+%     inputs, such as $filename or $sheetname for the filename or sheetname,
+%     respectively) to the values to be assigned to sheet fields.
+%     mapstruct.names contains the list of headers names, where the index of
+%     each header corresponds to the field 'var[index]'. mapstruct.varX
+%     contains the field 'value', as well as fields for each sheet
+%     fieldname (e.g. sheet_category) that gets mappings from this
+%     parameter source. the field 'value' can have three types of values:
+%     - a cellstr of possible values in the input file. In this case,
+%     cellstrs of the same length are given for the sheet fieldnames, which
+%     provide the values to map too.
+%     - the string 'echo'. In this case, the value in the input file is fed
+%     to a function, the name of which is given in the sheet fieldname
+%     fields. if the function is an empty string, then the value in the
+%     input file is simply fed directly.
+%     - the string 'split'. In this case, the value in the input file for
+%     this column is a year, such that before this year the data is to have
+%     one designation, and after and including this year, it is to have a
+%     different designation. (this is used for the IMF WEO data). The
+%     designations are provided in the sheet fieldname field.
+%
+%
+% GLOBALS:
+% CONSTANTS: A structure containing string and numeric constants, which are
+%           used in the PRIMAP functions. These are configurations like
+%           scaling factors, but also fieldnames for the database and
+%           mapping vectors etc.
+%
+% NOTES:
+%
+% (c) Written by Kathleen Markmann and the PRIMAP - PIK team, June 2012.
+% This file is shared under a "Attribution-NonCommercial-ShareAlike 3.0"
+% licence, which means that you must give credit to the author, are not
+% allowed to use it for commercial purposes and you may share your
+% derivatives of this work only under a similar licence. Other than that,
+% you are free to copy, display, use, modify the code. The details of this
+% licence are given at: http://creativecommons.org/licenses/by-nc-sa/3.0/
+%
+% version: 150602 JG - based on fao_specifications
+% version: 201001 JG - adpated for FAO 2020A
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+global CONSTANTS
+if isempty(CONSTANTS)
+    error('Empty global variable! Please initialise with the drive_set_paths_and_globals... function!')
+end
+
+%please insert the download date?!
+date = 'Jan 2023'; % for FAO2019A
+%sourcename='FAO2018';
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%% sheet management %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% INPUT NAMES OF EXCEL SHEETS TO READ IN. IF NOT SPECIFIED, ALL SHEETS ARE
+% READ IN
+%specifications.sheetsToRead = {...
+%};
+
+% INPUT NAMES OF EXCEL SHEETS TO NOT READ IN. IF SHEET IS ENCOUNTERED THAT
+% IS NOT LISTED IN THE SHEETSTOREAD OR SHEETSTOIGNORE, A WARNING IS GIVEN
+%specifications.sheetsToIgnore = {...
+%};
+
+% data in csv-format?
+specifications.isCSV = true;
+specifications.CSVdelimiter = ',';
+specifications.CSVQuotesToRemove = '';
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%% READING SHEETS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% SPECIFY IF DATA IS EXTRA LONG AND NEEDS TO BE READ IN BLOCKS
+% IF BlockSize and dataBottomRightCorner IS NOT FIELD, DEFAULT IS TO READ IN ALL DATA AT ONCE
+specifications.blockSize = 10000;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% SPECIFY IF LABEL IS ONLY ONE COLUMN. DEFAULT IS FALSE. IF TRUE, THEN
+% COLUMN NAMES DO NOT NEED TO BE SPECIFIED. LABELS ASSUMED TO BE IN FIRST
+% COLUMN
+specifications.singleLabel = false;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% SPECIFY COLUMN INDEX WHERE ANNUAL DATA STARTS
+%%%% TEMP %%%%
+specifications.colStartOfYear = 9;
+%temporarily because index has been written in files by mistake
+%%%% TEMP %%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% SPECIFY ROW INDEX WHERE THE HEADER IS FOUND. IF NOT GIVEN, DEFAULT IS 1.
+% CAN BE MULTIPLE ROWS. IN THIS CASE, ALL ROWS LISTED ARE CONCATENATED WITH
+% NO SPACES BETWEEN EACH ROW.
+specifications.rowOfHeader = 1;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% SPECIFY ROW INDEX WHERE DATA STARTS. DEFAULT IS ROW AFTER HEADER
+specifications.rowOfData = 2;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% HEADER VALUE THAT INDICATES THE END OF DATA. DATA IS
+% ASSUMED TO END BEFORE THIS. IF NOT GIVEN, DEFAULT IS NaN
+%specifications.nonDataKeyword = '2014F';
+%each of these specification columns has to be defined in the following...
+%the way is the same for all of them: first you define the name for the
+%column than you define the mapping
+%always the bulk name and in the same order the according PRIMAPDB names
+
+
+%specifications.yearColIgnore = '\d*F';
+%specifications.yearColTransform = '\d*'; %this is used to match a string
+% which is then taken as the year
+
+% DEFINE KEYSTRUCT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% FOR EVERY SHEET KEY THAT IS TO BE DEFINED FROM THE INPUT FILE, PROVIDE
+% THE SHEET KEY NAME AS A FIELD, AND THEN LIST THE COLUMN HEADERS (OR
+% $filename or $sheetname FOR THE FILENAME OR EXCEL SHEETNAME,
+% RESPECTIVELY) IN THE SUBFIELD colName. IF SINGLE COLUMN MODE IS DEFINED,
+% THEN USE $label.
+
+specifications.keystruct.(CONSTANTS.nameOfSheetEntity).colName = {'Element'};
+specifications.keystruct.(CONSTANTS.nameOfCountryVector).colName = {'Area Code'};
+specifications.keystruct.(CONSTANTS.nameOfSheetCategory).colName = {'Item'};
+specifications.keystruct.(CONSTANTS.nameOfSheetType).colName = {'Element'};
+specifications.keystruct.(CONSTANTS.nameOfSheetUnit).colName = {'Unit', 'Element', 'Variable'}; % {'Unit'};
+
+%% TODO
+% we need an new FAO specification because the new emission data does not really fit into the old categories
+% we now have the following
+% * Emissions on agricultural land
+% = Farm-gate emissions + Fires in humid tropical forests + Net Forest conversion + Fires in organic soils
+% * Farm-gate emissions (1)
+% = Enteric Fermentation (1A) + Manure Management (1B) + Rice Cultivation (1C) + Synthetic Fertilizers (1D)
+% + Manure applied to Soils (1E) + Manure left on Pasture (1F) + Crop Residues (1G) + Burning - Crop residues (1J)
+% + Drained organic soils (1H, CO2 and N2O) + On-farm energy use (1K)
+% * FORESTLAND = Forestland + Forest fires
+% * LAND USE CHANGE = Fires in humid tropical forests + Net Forest conversion + Fires in organic soils
+% (FORESTLAND and LAND USE CHANGE are not available in the csv files, but named on the
+% "FAOSTAT Domain Emissions Totals. Metadata" document)
+
+
+% IPCC type categories
+% AFOLU = IPCC Agriculture + LULUCF
+% IPCC Agriculture = Farm-gate emissions - On-farm energy use - Drained organic soils (CO2)
+% LULUCF = LAND USE CHANGE + FORESTLAND + Drained organic soils (CO2)
+% or LULUCF = Fires in humid tropical forests + Net Forest conversion + Fires in organic soils
+%           + Forestland + Forest fires
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MAPPING %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% LISTS MAPPING BETWEEN VALUES IN THE INPUT FILE AND VALUES FOR THE
+% DATATABLES. THE COLUMN HEADERS (OR $filename or $sheetname FOR THE
+% FILENAME OR EXCEL SHEETNAME, RESPECTIVELY) ARE LISTED IN FIELD name.
+% STANDARD INPUT FOR var[i].value IS TO PROVIDE A CELLSTR OF ALL VALUES
+% FROM THE COLUMN LISTED IN name{i} THAT ARE TO BE ASSIGNED. VALUES TO BE
+% IGNORED CAN BE LISTED IN FIELD var[i].valuesToIgnore, TO REDUCE WARNING
+% MESSAGES. THE SHEET KEYS TO MAP TO ARE LISTED AS FIELDS UNDER var[i],
+% WITH THE VALUE IN THE STANDARD CASE BEING A CELLSTR WITH THE SAME LENGTH
+% AS THE FIELD value, THAT PROVIDES THE VALUES TO BE USED IN THE
+% DATATABLES.
+% ALTERNATE INPUTS FOR FIELD value ARE THE STRINGS 'echo' OR
+% 'split'. 'echo' PASSES THE INPUT VALUE TO THE FUNCTION LISTED UNDER THE
+% SHEET KEY FIELD, AND ASSIGNS THE OUTPUT TO THE SHEET KEY. 'split' ASSUMES
+% THAT THE INPUT VALUES IS A YEAR. THE DATA WILL BE SPLIT INTO TWO
+% DATATABLES AT THAT YEAR, WITH THAT YEAR BEING A PART OF THE SECOND
+% DATATABLE. THE FIRST TABLE WILL GET THE FIRST ELEMENT ASSIGNED TO SHEET
+% KEY FIELD, AND THE SECOND TABLE WILL GET THE SECOND ELEMENT.
+
+%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% entity, category and unit %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+i = 1;
+specifications.mapstruct.name{i} = 'Element';
+specifications.mapstruct.(['var' num2str(i)]).value = { ...
+    'Emissions (CH4)', 'Emissions (N2O)', 'Emissions (CO2)', ...
+    'Emissions (CO2eq) (AR5)', 'Direct emissions (N2O)', 'Indirect emissions (N2O)', ...
+    'Emissions (CO2eq) from F-gases (AR5)',
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetEntity) = { ...
+    'CH4', 'N2O', 'CO2', ...
+    'KYOTOGHGAR5', 'N2O', 'N2O', ...
+    'FGASESAR5', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetUnit) = { ...
+    '', '', '', ...
+    'CO2eq', '', '', ...
+    'CO2eq', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetType) = { ...
+    'NET', 'NET', 'NET', ...
+    'NET', 'DIR', 'IND', ...
+    'NET', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).valueToIgnore = { ...
+    'Emissions (CO2eq) from N2O (AR5)', ...
+    'Emissions (CO2eq) from CH4 (AR5)', ...
+    };
+
+
+%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% region %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+i = 2;
+specifications.mapstruct.name{i} = 'Area Code';
+specifications.mapstruct.(['var' num2str(i)]).value = { ...
+    '1', '2', '3', '4', '5', ...
+    '6', '7', '8', '9', '10', ...
+    '11', '12', '13', '14', '15', ...
+    '16', '17', '18', '19', '20', ...
+    '21', '22', '23', '24', '25', ... % 5
+    '26', '27', '28', '29', '30', ...
+    '31', '32', '33', '35', '36', ...
+    '37', '38', '39', '40', '41', ...
+    '42', '43', '44', '45', '46', ...
+    '47', '48', '49', '50', '51', ... % 10
+    '52', '53', '54', '55', '56', ...
+    '57', '58', '59', '60', '61', ...
+    '62', '63', '64', '65', '66', ...
+    '67', '68', '69', '70', '71', ...
+    '72', '73', '74', '75', '77', ... % 15
+    '78', '79', '80', '81', '82', ...
+    '83', '84', '85', '86', '87', ...
+    '88', '89', '90', '91', '92', ...
+    '93', '94', '95', '96', '97', ...
+    '98', '99', '100', '101', '102', ... % 20
+    '103', '104', '105', '106', '107', ...
+    '108', '109', '110', '111', '112', ...
+    '113', '114', '115', '116', '117', ...
+    '118', '119', '120', '121', '122', ...
+    '123', '124', '125', '126', '127', ... % 25
+    '128', '129', '130', '131', '132', ...
+    '133', '134', '135', '136', '137', ...
+    '138', '139', '140', '141', '142', ...
+    '143', '144', '145', '146', '147', ...
+    '148', '149', '150', '151', '153', ... %30
+    '154', '155', '156', '157', '158', ...
+    '159', '160', '161', '162', '163', ...
+    '164', '165', '166', '167', '168', ...
+    '169', '170', '171', '172', '173', ...
+    '174', '175', '176', '177', '178', ... %35
+    '179', '180', '181', '182', '183', ...
+    '184', '185', '186', '187', '188', ...
+    '189', '190', '191', '192', '193', ...
+    '194', '195', '196', '197', '198', ...
+    '199', '200', '201', '202', '203', ... % 40
+    '205', '206', '207', '208', '209', ...
+    '210', '211', '212', '213', '214', ...
+    '215', '216', '217', '218', '219', ...
+    '220', '221', '222', '223', '224', ...
+    '225', '226', '227', '228', '229', ... % 45
+    '230', '231', '232', '233', '234', ...
+    '235', '236', '237', '238', '239', ...
+    '240', '242', '243', '244', '248', ...
+    '249', '250', '251', '255', '256', ...
+    '258', '259', '260', '264', '270', ... % 50
+    '271', '272', '273', '274', '275', ...
+    '276', '277', '278', '279', '280', ...
+    '281', '282', '283', '284', '299', ...
+
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfCountryVector) = { ...
+    'ARM', 'AFG', 'ALB', 'DZA', 'ASM', ...
+    'AND', 'AGO', 'ATG', 'ARG', 'AUS', ...
+    'AUT', 'BHS', 'BHR', 'BRB', 'BELLUX', ...
+    'BGD', 'BMU', 'BTN', 'BOL', 'BWA', ...
+    'BRA', 'ABW', 'BLZ', 'IOT', 'SLB', ... % 5
+    'BRN', 'BGR', 'MMR', 'BDI', 'ATA', ...
+    'BVT', 'CMR', 'CAN', 'CPV', 'CYM', ...
+    'CAF', 'LKA', 'TCD', 'CHL', 'CHN', ...
+    'CXR', 'CCK', 'COL', 'COM', 'COG', ...
+    'COK', 'CRI', 'CUB', 'CYP', 'CZESVK', ... % 10
+    'AZE', 'BEN', 'DNK', 'DMA', 'DOM', ...
+    'BLR', 'ECU', 'EGY', 'SLV', 'GNQ', ...
+    'ETHERI', 'EST', 'FRO', 'FLK', 'FJI', ...
+    'FIN', 'FRA', 'GUF', 'PYF', 'ATF', ...
+    'DJI', 'GEO', 'GAB', 'GMB', 'F77', ... % 15
+    'F78', 'DEU', 'BIH', 'GHA', 'GIB', ... %F77,78 is GDR and BRD but no data in FAOSTAT
+    'KIR', 'GRC', 'GRL', 'GRD', 'GLP', ...
+    'GUM', 'GTM', 'GIN', 'GUY', 'HMD', ...
+    'HTI', 'VAT', 'HND', 'HKG', 'HUN', ...
+    'HRV', 'ISL', 'IND', 'IDN', 'IRN', ... % 20
+    'IRQ', 'IRL', 'ISR', 'ITA', 'CIV', ...
+    'KAZ', 'JAM', 'JPN', 'JTN', 'JOR', ... % not sure what JTN is
+    'KGZ', 'KEN', 'KHM', 'PRK', 'KOR', ...
+    'KWT', 'LVA', 'LAO', 'LBN', 'LSO', ...
+    'LBR', 'LBY', 'LIE', 'LTU', 'MHL', ... % 25
+    'MAC', 'MDG', 'MWI', 'MYS', 'MDV', ...
+    'MLI', 'MLT', 'MTQ', 'MRT', 'MUS', ...
+    'MEX', 'MID', 'MCO', 'MNG', 'MSR', ... % MID unknown
+    'MAR', 'MOZ', 'FSM', 'MDA', 'NAM', ...
+    'NRU', 'NPL', 'NLD', 'ANT', 'NCL', ... % 30
+    'MKD', 'VUT', 'NZL', 'NIC', 'NER', ...
+    'NGA', 'NIU', 'NFK', 'NOR', 'MNP', ...
+    'TTPI', 'PAK', 'PAN', 'CZE', 'PNG', ...
+    'PRY', 'PER', 'PHL', 'PCN', 'POL', ...
+    'PRT', 'GNB', 'TLS', 'PRI', 'ERI', ... % 35
+    'QAT', 'PLW', 'ZWE', 'REU', 'ROU', ...
+    'RWA', 'RUS', 'SRBMNE', 'SHN', 'KNA', ...
+    'LCA', 'SPM', 'VCT', 'SMR', 'STP', ...
+    'SAU', 'SEN', 'SYC', 'SLE', 'SVN', ...
+    'SVK', 'SGP', 'SOM', 'ZAF', 'ESP', ... % 40
+    'ESH', 'SDNSSD', 'SUR', 'TJK', 'SWZ', ...
+    'SWE', 'CHE', 'SYR', 'TKM', 'TWN', ...
+    'TZA', 'THA', 'TGO', 'TKL', 'TON', ...
+    'TTO', 'OMN', 'TUN', 'TUR', 'TCA', ...
+    'ARE', 'UGA', 'TUV', 'FSU', 'GBR', ... % 45
+    'UKR', 'USA', 'UMI', 'BFA', 'URY', ...
+    'UZB', 'VEN', 'VNM', 'ETH', 'VGB', ...
+    'VIR', 'WAK', 'WLF', 'WSM', 'YUG', ... % WAK unknown
+    'YEM', 'COD', 'ZMB', 'BEL', 'LUX', ...
+    'AIA', 'CHI', 'SJM', 'IMN', 'MYT', ... % 50
+    'SGS', 'SRB', 'MNE', 'GGY', 'UVK', ... % UVK unknown
+    'SDN', 'SSD', 'BES', 'CUW', 'SXM', ...
+    'MAF', 'BLM', 'JEY', 'ALA', 'PSE', ...
+
+    };
+specifications.mapstruct.(['var' num2str(i)]).valueToIgnore = { ...
+    '5000', '5100', '5101', '5102', '5103', ...
+    '5104', '5105', '5200', '5203', '5204', ...
+    '5206', '5207', '5300', '5301', '5302', ...
+    '5303', '5304', '5305', '5400', '5401', ...
+    '5402', '5403', '5404', '5500', '5501', ...
+    '5502', '5503', '5504', '5707', '5801', ...
+    '5802', '5803', '5815', '5817', '5848', ...
+    '5849', '5873', 'F246', 'F247', 'F609', 'F614', ...
+    'F616', 'F617', 'F618', 'F619', 'F620', ...
+    'F621', 'F622',  '351',  ... %351 is China but we have it split up here
+};
+
+
+%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+i = 3;
+specifications.mapstruct.name{i} = 'Item';
+specifications.mapstruct.(['var' num2str(i)]).value = { ...
+    'LULUCF', 'IPCC Agriculture', ...
+    'Enteric Fermentation', 'Manure Management', 'Synthetic Fertilizers', ...
+    'Manure applied to Soils', 'Manure left on Pasture', 'Crop Residues', ...
+    'Burning - Crop residues', 'Net Forest conversion', 'Forestland', ...
+    'Savanna fires', 'AFOLU', ...
+    'Rice Cultivation', 'Agricultural Soils', ...
+    'All sectors with LULUCF', 'All sectors without LULUCF', 'Energy', ...
+    'IPPU', 'Other', 'Waste', ...
+    'Drained organic soils', 'Drained organic soils (N2O)', 'Drained organic soils (CO2)', ...
+    %'Forest fires', ... %5
+    %'Fires in organic soils', 'On-farm energy use', 'Emissions on agricultural land', ...
+    %'Farm-gate emissions', 'Fires in humid tropical forests',
+        %'Agri-food systems',
+    %'Farm gate', 'Fertilizers Manufacturing',
+    %'Food Household Consumption', 'Food Packaging',
+    %'Food Processing', 'Food Retail',
+    %'Food Transport', 'Food systems waste disposal',
+    % 'Land Use change', 'On-farm electricity use',
+    % 'Pre- and post- production',
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetCategory) = { ...
+    'IPCMLULUCF', 'IPCMAG', ...
+    'IPC3A1', 'IPC3A2', 'IPC3C4A', ...
+    'IPC3C4B', 'IPC3C4C', 'IPC3C4D', ...
+    'IPC3C1B', 'IPCMNFC', 'IPC3B1', ...
+    'IPC3C1C', 'IPC3', ...
+    'IPC3C7', 'IPCM3C45AG', ...
+    'IPC0', 'IPCM0EL', 'IPC1', ...
+    'IPC2', 'IPC5', 'IPC4', ...
+    'IPCM3C45DOS', 'IPCM3C45DOSN2O', 'IPCM3C45DOSCO2', ...
+    %'MFF', ... %5
+    %'2D3', '1K', 'MAL', ...
+    %'MFG', '2D1',
+    %'1H', 'M1HAG', 'M1HLU', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).valueToIgnore = { ...
+    'Forest fires', ... %5
+    'Fires in organic soils', 'On-farm energy use', 'Emissions on agricultural land', ...
+    'Farm-gate emissions', 'Fires in humid tropical forests',  ...
+    'Agri-food systems', 'Farm gate', 'Fertilizers Manufacturing', ...
+    'Food Household Consumption', 'Food Packaging', ...
+    'Food Processing', 'Food Retail', ...
+    'Food Transport', 'Food systems waste disposal', ...
+    'Land Use change', 'On-farm electricity use', ...
+    'Pre- and post- production', ...
+    'Agrifood Systems Waste Disposal', 'Agrifood systems', ... % below new in 2024A
+    'Pesticides Manufacturing', 'Pre- and Post- Production', ...
+    'Emissions from crops', 'Emissions from livestock', ... don't fit IPCC categories
+    };
+
+% general caveats in mapping: does not distinguish between direct and
+% indirect. All mapped to direct IPCC cats (3.C.4)
+% it's unclear where drained organic soils should be mapped to. It's
+% included in Agricultural soils and thus implicitly mapped to M.3.C.45AG
+
+% TODO make new FAO terminology
+% FAO sector hierarchy (total domain)
+% IPCC Agriculture (CH4, N2O) X
+% * Enteric fermentation (CH4 only) X
+% * Manure Management X
+% * Burning - crop residues X
+% * Savanna fires X
+% * Rice cultivation (CH4 only) X
+% * Agricultural soils (N2O only) X
+%   * Synthetic Fertilizers X
+%   * Manure applied to soils X
+%   * Manure left on Pasture X
+%   * Crop Residues X
+%   * Drained organic soils (N2O)
+
+% not in IPCC Agriculture hierarchy
+% Drained organic soils (CO2)
+% Net Forest conversion
+% Fires in humid tropical forests
+% Fires in organic soils
+% Forest fires
+% Forestland
+% On-farm energy use
+
+
+% agg
+% Agricultural Soils
+% IPCC Agriculture
+% LULUCF
+% AFOLU
+% Emissions on agricultural land
+% Farm-gate emissions
+% Land Use change
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+i = 4;
+specifications.mapstruct.name{i} = 'Unit';
+specifications.mapstruct.(['var' num2str(i)]).value = { ...
+    'Gigagrams', 'gigagrams', 'tonnes', 'Kg', 'Head', ...
+    'kilotonnes', 'kt', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetUnit) = { ...
+    'Gg', 'Gg', 't', 'Kg', 'Heads', ...
+    'kt', 'kt', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).valueToIgnore = { ...
+    'g N20/kg dry matter', 'g CH4/kg dry matter', 'Ha', ...
+    'Kg of nutrients', 'kg N2O-N/kg N', 'million kWh', ...
+    'Terajoule', 'Kg/TJ', 'Kg CH4/head', ...
+    'kg N2O-N/kg N', ...
+    };
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+i = 5;
+specifications.mapstruct.name{i} = 'Variable';
+specifications.mapstruct.(['var' num2str(i)]).value = { ...
+    'Value', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).(CONSTANTS.nameOfSheetUnit) = { ...
+    '', ...
+    };
+specifications.mapstruct.(['var' num2str(i)]).valueToIgnore = { ...
+    'Flag', 'Note', 'FAO Source', 'Source Code', ...
+    'Domain', 'Flag Description', 'Domain Code', ...
+    'Source', 'Area Code (M49)', ...
+    };
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%% default table %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%since we want to build up datatables we need a default table which gets
+%parametrised according to the row which we are parsing
+
+%specify default datatable (all necessary fields...)
+specifications.patternTable.(CONSTANTS.nameOfSheetCode) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetCategory) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetCategoryName) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetEntity) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetType) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetClass) = 'TOTAL';
+specifications.patternTable.(CONSTANTS.nameOfSheetDescr) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetNote) = 'Read in by JG';
+specifications.patternTable.(CONSTANTS.nameOfSheetSource) = sourcename;
+specifications.patternTable.(CONSTANTS.nameOfSheetFirstdatarow) = 0;
+specifications.patternTable.(CONSTANTS.nameOfSheetUnit) = '';
+specifications.patternTable.(CONSTANTS.nameOfSheetScenario) = 'HISTORY';
+specifications.patternTable.(CONSTANTS.nameOfSheetDatatype) = 'CountryYearMatrix';
+specifications.patternTable.(CONSTANTS.nameOfCountryVector) = {};
+specifications.patternTable.(CONSTANTS.nameOfDataField) = [];
+specifications.patternTable.(CONSTANTS.nameOfYearVector) = [];
+specifications.patternTable.(CONSTANTS.nameOfSheetSubsource) = {};
+specifications.patternTable.(CONSTANTS.nameOfSheetTablekind) = CONSTANTS.defaultForSheetTablekind;
+
+end %function bp_specifications

+ 551 - 0
prepare_FAO2022TOT_data.m

@@ -0,0 +1,551 @@
+function prepare_FAO2022TOT_data(source_name, last_hist_year)
+% Prepares FAO data into country time series. Countries which were
+% split up during the course of history are downscaled to their current
+% members using the shares that were present in the first year in which all
+% members had emission values in the CDIAC source. countries which are now
+% merged over time get their previous members added up to the country with
+% the current political boundaries.
+% The resulting table is: CO2_CATM0EL_TOTAL_NET_HISTORY_CDIAC2010PROC
+%
+% author : CC
+% version : 20120315
+% version : 20120403 JG - substitute negative data by zero, fixed
+% version : 20131024 JG - CDIAC2013 based on the CDIAC2011 file
+% combine_subregions
+% version : 20150416 JG - delete time series consisting only of zeros
+% version : 20150511 JG - fixed wrong region downscaling
+% version : 20150715 JG - summing to primap country definitions
+% version : 20170120 JG - 2016 data
+% version : 20170825 JG - IPCC 2006 categories
+% version : 20190708 JG - convert to Gg from GgCO2eq for all single gas tables
+% version : 20201009 JG - 2020A data which needs extrpolation as not all sectors have been updated
+% version : 20201207 JG - 2020B data
+
+%% issues FAO
+% gap in Micronesia data (sometimes filled with zero data)
+% gap in Maldives data (sometimes filled with zero data)
+% Saint kitts and Newis. very low data in last years (CH4, IPC3A, IPC3C)
+% Afghanistan: very low data in last year (CH4, IPC3C)
+% many countries: very low data before 1990 and sometimes also in last year (CH4, IPC3C)
+% this is probably due to mixing LU and AGRI data, where LU data only covers 1990-2015
+% while AGRI data covers 1960 - 2016 (same for N2O)
+% saint Lucia: data only until 1980 (CH4, IPC3C)
+
+%%%% drained organic soils current omitted
+
+global CONSTANTS;
+
+errorID = 'CreateDatabase:DataPreparation:FAO:PrepareFAO2022Data:';
+narginchk(0, 2)
+
+if nargin == 2
+    source = source_name;
+elseif nargin == 1
+    if ischar(source_name)
+        source = source_name;
+        last_hist_year = 0;
+    else
+        source = 'FAO2022P2';
+    end
+else
+    source = 'FAO2022P2';
+    last_hist_year = 0;
+end
+
+sourceSuffix = 'I';
+%tempSuffix = 'P';
+procSource = [source sourceSuffix];
+%tempSource = [source tempSuffix];
+scenario = 'HISTORY';
+scenarioProjection = 'PROJECTION';
+%GWPspec = CONSTANTS.gwpSARSpecification; % make source dependent if necessary
+
+nYearKey = 5; % number of years to consider for downscaling key
+
+%% TODO check if extrapolation of subsectors needed
+
+%% remove existing proc source
+allProcTablenames = get_table_sheetcodes_for({CONSTANTS.nameOfSheetSource}, {{procSource}});%, tempSource}});
+remove_tables_from_database(allProcTablenames);
+
+%% copy to temp source
+copy_source_scenario_tables_from_to(source, scenario, procSource, scenario);
+
+%% adjust countries
+% first aggregate what are now parts of countries into current countries,
+% by adding parts + country.
+allTablenames = get_table_sheetcodes_for({CONSTANTS.nameOfSheetSource}, ...
+    {{procSource}});
+warning off Tools:AssertValidcategoryforSourceScenario:ValidCategory
+
+theseTables = struct;
+% for each emissions table, make these group manipulations
+for iTable = 1:length(allTablenames)
+    % adjust countries
+    newTable = adjust_countries(allTablenames{iTable}, nYearKey, false);
+    if isempty(fieldnames(newTable))
+        primap_log([errorID 'EmptyTable'], CONSTANTS.ERRuserErrorWarning, ...
+            ['Table ' allTablenames{iTable} ' empty after country adjustment and removal of NaN and zero data']);
+    else
+        theseTables.(newTable.(CONSTANTS.nameOfSheetCode)) = newTable;
+    end
+
+end
+
+%%% add tables to database
+remove_tables_from_database(allTablenames);
+success = overwrite_or_add_tables_in_database(theseTables, true, false);
+if ~all(success)
+    primap_log([errorID 'NotAllSplitTablesAdded'], CONSTANTS.ERRprogramError, ...
+        'Some downscaled AGRI tables not added to DB. Check preceeding messages for reason');
+end
+
+
+%% split into historical data and projection in case projections are available and last_hist_year is given
+if last_hist_year > 0
+    % get all tablenames
+    newTables = struct;
+    allTablenames = get_table_sheetcodes_for({CONSTANTS.nameOfSheetSource}, {{procSource}});
+    for iTable = 1 : length(allTablenames)
+        % get table from DB
+        currentTable = get_table_from_database(allTablenames{iTable});
+        % check if table contains projection values
+        if any(currentTable.(CONSTANTS.nameOfYearVector) > last_hist_year)
+            % create projection table
+            currentTableProj = currentTable;
+            currentTableProj.(CONSTANTS.nameOfDataField) = currentTableProj.(CONSTANTS.nameOfDataField)(:, ...
+                currentTableProj.(CONSTANTS.nameOfYearVector) > last_hist_year);
+            currentTableProj.(CONSTANTS.nameOfYearVector) = ...
+                currentTableProj.(CONSTANTS.nameOfYearVector)(currentTableProj.(CONSTANTS.nameOfYearVector) > last_hist_year);
+            currentTableProj.(CONSTANTS.nameOfSheetScenario) = scenarioProjection;
+            currentTableProj.(CONSTANTS.nameOfSheetCode) = concatenate_structure_field_values(currentTableProj, ...
+                CONSTANTS.tableNameConstructionFromSheets);
+            currentTableProj = delete_only_nan_containing_countries(currentTableProj);
+            if ~isempty(fieldnames(currentTableProj))
+                newTables.(currentTableProj.(CONSTANTS.nameOfSheetCode)) = currentTableProj;
+            end
+
+            % create historical table
+            currentTableHist = currentTable;
+            currentTableHist.(CONSTANTS.nameOfDataField) = currentTableHist.(CONSTANTS.nameOfDataField)(:, ...
+                currentTableHist.(CONSTANTS.nameOfYearVector) <= last_hist_year);
+            currentTableHist.(CONSTANTS.nameOfYearVector) = ...
+                currentTableHist.(CONSTANTS.nameOfYearVector)(currentTableHist.(CONSTANTS.nameOfYearVector) <= last_hist_year);
+            currentTableHist = delete_only_nan_containing_countries(currentTableHist);
+            newTables.(currentTableHist.(CONSTANTS.nameOfSheetCode)) = currentTableHist;
+        end
+    end
+    success = overwrite_or_add_tables_in_database(newTables, true, false);
+    if ~all(success)
+        primap_log([errorID 'NotAllSplitTablesAdded'], CONSTANTS.ERRprogramError, ...
+            'Some split tables not added to DB. Check preceeding messages for reason');
+    end
+end
+
+
+%% construct higher level IPC categories
+% general parameters
+
+defaultParams=struct;
+%defaultParams.future.type='linear';
+defaultParams.future.type='none';
+defaultParams.future.fityears=15;
+%defaultParams.past.type='linear';
+defaultParams.past.type='none';
+defaultParams.past.fityears=20;
+defaultParams.past.fallback='none';
+
+FAOEntities = {'CO2', 'CH4', 'N2O'};
+
+categories = struct;
+
+%%% IPC3A = IPC3A1 + IPC3A2
+categories.IPC3A = struct;
+categories.IPC3A.targetCategory = 'IPC3A';
+categories.IPC3A.subcategories = {'IPC3A1', 'IPC3A2'};
+categories.IPC3A.entities = FAOEntities;
+categories.IPC3A.params = defaultParams;
+
+
+
+%%% IPCM3C1AG =  IPC3C1B + IPC3C1C
+categories.IPCM3C1AG = struct;
+categories.IPCM3C1AG.targetCategory = 'IPCM3C1AG';
+categories.IPCM3C1AG.subcategories = {'IPC3C1C', 'IPC3C1B'};
+categories.IPCM3C1AG.entities = FAOEntities;
+categories.IPCM3C1AG.params = defaultParams;
+
+%%% IPC3C1 = IPCM3C1AG + IPCM3C1LU
+%%%%%%%%%% LULUCF currently missing
+categories.IPC3C1 = struct;
+categories.IPC3C1.targetCategory = 'IPC3C1';
+categories.IPC3C1.subcategories = {'IPCM3C1AG', 'IPCM3C1LU'};
+categories.IPC3C1.entities = FAOEntities;
+categories.IPC3C1.params = defaultParams;
+
+%%% IPCM3C4AG = 'IPC3C4B', 'IPC3C4C', 'IPC3C4D',
+% omit currently as drained organic soils is missing but present in M3C45AG
+% categories.IPCM3C4AG = struct;
+% categories.IPCM3C4AG.targetCategory = 'IPCM3C4AG';
+% categories.IPCM3C4AG.subcategories = {'IPC3C4A', 'IPC3C4B', 'IPC3C4C', 'IPC3C4D', };
+% categories.IPCM3C4AG.entities = FAOEntities;
+% categories.IPCM3C4AG.params = defaultParams;
+
+%%% IPC3C4 = IPCM3C4AG
+%%%%%%%%%% LULUCF currently missing
+categories.IPC3C4 = struct;
+categories.IPC3C4.targetCategory = 'IPC3C4';
+categories.IPC3C4.subcategories = {'IPCM3C4AG'};
+categories.IPC3C4.entities = FAOEntities;
+categories.IPC3C4.params = defaultParams;
+
+%%% IPC3C = IPC3C1 + IPC3C4 + IPC3C5 + IPC3C6 + IPC3C7
+%%%%%%%%%% LULUCF currently missing
+categories.IPC3C = struct;
+categories.IPC3C.targetCategory = 'IPC3C';
+categories.IPC3C.subcategories = {'IPC3C1', 'IPCM3C45AG', 'IPC3C7'}; % 'IPC3C5', 'IPC3C6',
+categories.IPC3C.entities = FAOEntities;
+categories.IPC3C.params = defaultParams;
+
+%%% IPCM3CAG = IPCM3C1AG + IPCM3C4AG + IPC3C5 + IPC3C6 + IPC3C7
+categories.IPCM3CAG = struct;
+categories.IPCM3CAG.targetCategory = 'IPCM3CAG';
+categories.IPCM3CAG.subcategories = {'IPCM3C1AG', 'IPCM3C45AG', 'IPC3C7'}; %'IPC3C5', 'IPC3C6',
+categories.IPCM3CAG.entities = FAOEntities;
+categories.IPCM3CAG.params = defaultParams;
+
+% %%% IPCM3CLU = IPCM3C1LU
+% categories.IPCM3CLU = struct;
+% categories.IPCM3CLU.targetCategory = 'IPCM3CLU';
+% categories.IPCM3CLU.subcategories = {'IPCM3C1LU'};
+% categories.IPCM3CLU.entities = FAOEntities;
+% categories.IPCM3CLU.params = defaultParams;
+%
+% %%% IPC3 = IPC3A + IPC3B + IPC3C
+% if any(strcmp(source, {'FAO2021A'}))
+%     categories.IPC3 = struct;
+%     categories.IPC3.targetCategory = 'IPC3';
+%     categories.IPC3.subcategories = {'IPC3A', 'IPCMLULUCF', 'IPCM3CAG'};
+%     categories.IPC3.entities = FAOEntities;
+%     categories.IPC3.params = defaultParams;
+% else
+%     categories.IPC3 = struct;
+%     categories.IPC3.targetCategory = 'IPC3';
+%     categories.IPC3.subcategories = {'IPC3A', 'IPC3B', 'IPC3C'};
+%     categories.IPC3.entities = FAOEntities;
+%     categories.IPC3.params = defaultParams;
+%
+%     %%% IPCMLULUCF = IPC3B + IPC3CLU
+%     categories.IPCMLULUCF = struct;
+%     categories.IPCMLULUCF.targetCategory = 'IPCMLULUCF';
+%     categories.IPCMLULUCF.subcategories = {'IPC3B', 'IPCM3CLU'};
+%     categories.IPCMLULUCF.entities = FAOEntities;
+%     categories.IPCMLULUCF.params = defaultParams;
+% end
+
+
+%%% IPCMAG = IPC3A + IPCM3CA
+categories.IPCMAG = struct;
+categories.IPCMAG.targetCategory = 'IPCMAG';
+categories.IPCMAG.subcategories = {'IPC3A', 'IPCM3CAG'};
+categories.IPCMAG.entities = FAOEntities;
+categories.IPCMAG.params = defaultParams;
+
+%%% IPCMAGELV = IPCM3CA
+categories.IPCMAGELV = struct;
+categories.IPCMAGELV.targetCategory = 'IPCMAGELV';
+categories.IPCMAGELV.subcategories = {'IPCM3CAG'};
+categories.IPCMAGELV.entities = FAOEntities;
+categories.IPCMAGELV.params = defaultParams;
+
+entities = struct;
+entities.KYOTOGHG = struct;
+entities.KYOTOGHG.targetEntity = 'KYOTOGHG';
+entities.KYOTOGHG.subEntities = {'CO2', 'CH4', 'N2O'};
+entities.KYOTOGHG.categories = {'IPC3', 'IPC3A', 'IPC3C', 'IPCMAG', 'IPCMAGELV', 'IPCMLULUCF'}; %'IPC3B',
+entities.KYOTOGHG.params = defaultParams;
+entities.KYOTOGHG.GWP = CONSTANTS.gwpSARSpecification;
+
+entities.KYOTOGHGAR4 = struct;
+entities.KYOTOGHGAR4.targetEntity = 'KYOTOGHGAR4';
+entities.KYOTOGHGAR4.subEntities = {'CO2', 'CH4', 'N2O'};
+entities.KYOTOGHGAR4.categories = {'IPC3', 'IPC3A', 'IPC3C', 'IPCMAG', 'IPCMAGELV', 'IPCMLULUCF'}; %'IPC3B',
+entities.KYOTOGHGAR4.params = defaultParams;
+entities.KYOTOGHGAR4.GWP = CONSTANTS.gwpAR4Specification;
+
+entities.KYOTOGHGAR5 = struct;
+entities.KYOTOGHGAR5.targetEntity = 'KYOTOGHGAR5';
+entities.KYOTOGHGAR5.subEntities = {'CO2', 'CH4', 'N2O'};
+entities.KYOTOGHGAR5.categories = {'IPC3', 'IPC3A', 'IPC3C', 'IPCMAG', 'IPCMAGELV', 'IPCMLULUCF'}; %'IPC3B',
+entities.KYOTOGHGAR5.params = defaultParams;
+entities.KYOTOGHGAR5.GWP = CONSTANTS.gwpAR5Specification;
+
+entities.KYOTOGHGAR6 = struct;
+entities.KYOTOGHGAR6.targetEntity = 'KYOTOGHGAR6';
+entities.KYOTOGHGAR6.subEntities = {'CO2', 'CH4', 'N2O'};
+entities.KYOTOGHGAR6.categories = {'IPC3', 'IPC3A', 'IPC3C', 'IPCMAG', 'IPCMAGELV', 'IPCMLULUCF'}; %'IPC3B',
+entities.KYOTOGHGAR6.params = defaultParams;
+entities.KYOTOGHGAR6.GWP = CONSTANTS.gwpAR6Specification;
+
+aggregate_and_extrapolate_source(procSource, scenario, categories, entities);
+aggregate_and_extrapolate_source(procSource, scenarioProjection, categories, entities);
+
+remove_desired_regions_from_source_scenario({'EARTH', 'POLYNESIA'}, procSource, scenario);
+remove_desired_regions_from_source_scenario({'EARTH', 'POLYNESIA'}, procSource, scenarioProjection);
+
+% TODO
+% remove years with partial data from aggregate timeseries
+% FAO2020B
+% IPC3C1, IPCM3CAG, IPC3C, IPC3, IPCMAG, IPCMAGELV, IPC3B, IPCMLULUCF (remove 2019, 2020)
+
+% sum countries to CRF / UNFCCC reporting level
+sum_countries_primap(procSource, false);
+
+
+
+%% remove the FAO terminology tables
+% tempTables = get_table_sheetcodes_for({CONSTANTS.nameOfSheetSource}, {{tempSource}});
+% %success = remove_tables_from_database(tempTables);
+% if ~all(success)
+%     disp('some tales could not be removed from the DB');
+% end
+% keep them as sometimes country downscaled tables in FAO eminology are needed
+
+warning off Tools:AssertValidcategoryforSourceScenario:ValidCategory
+
+end %prepareFAO2015data
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+function newTable = adjust_countries(tablename, nYearKey, negativeToZero)
+    global CONSTANTS;
+
+    errorID = 'CreateDatabase:DataPreparation:FAO:PrepareFAO2016Data:AdjustCountries';
+    thisTable = get_table_from_database(tablename);
+    %thisTable.(CONSTANTS.nameOfSheetSource) = tempSource;
+    %thisTable.(CONSTANTS.nameOfSheetCode) = concatenate_structure_field_values(thisTable,'default');
+    thisTable.(CONSTANTS.nameOfSheetDescr) = [thisTable.(CONSTANTS.nameOfSheetDescr) ...
+        ', regions summed and downscaled to country time series'];
+    source = thisTable.(CONSTANTS.nameOfSheetSource);
+
+    if negativeToZero
+        % change negative values to zero for FAO1
+        isneg = thisTable.data < 0;
+        thisTable.data(isneg) = 0;
+    end
+
+    % remove countries which contain only NaN data
+    thisTable = delete_only_nan_containing_countries(thisTable);
+    % remove countries which contain only zero data
+    thisTable = delete_only_zero_containing_countries(thisTable);
+
+    if ~isempty(fieldnames(thisTable))
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        %% REGIONS DOWNSCALED TO COUNTRIES, REGIONS DELETED
+        % FIRST, REGIONS THAT SPLIT INTO COUNTRIES THAT EXIST IN FAO
+        % split most recent first. Some regions split into other regions that
+        % must be split first.
+
+        % USSR = RUSSIA + ARMENIA + AZERBAIJAN + BELARUS + BULGARIA + ESTONIA  (<= 1991)
+        %        + GEORGIA + KAZAKHSTAN + KYRGYZSTAN + LATVIA + LITHUANIA +
+        %        MOLDOVA + TAJIKISTAN + TURKMENISTAN + UKRAINE + UZBEKISTAN
+        regions2Delete = {'FSU'}; % doesn't work for FAO1K
+        theseCountries = get_members_of(regions2Delete{1});
+        yearKey = 1992;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % SERBIA/MONTENEGRO = SERBIA + MONTENEGRO (<= 2005)
+        regions2Delete = {'SRBMNE'};
+        theseCountries = get_members_of(regions2Delete{1});
+        yearKey = 2006;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % YUGOSLAVIA = BOSNIA/HERZEGOVINA + CROATIA + MACEDONIA + SLOVENIA + SERBIA + MONTENEGRO (<= 1991)
+        regions2Delete = {'YUG'};
+        theseCountries = get_members_of(regions2Delete{1});
+        yearKey = 1992;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % CZECHOSLOVAKIA = CZECH REPUBLIC + SLOVAKIA (<= 1991)
+        regions2Delete = {'CZESVK'};
+        theseCountries = get_members_of(regions2Delete{1}); %{'CZE','SVK'};
+        yearKey = 1992;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % Pacific Islands Trust Territory = Federated States of Micronesia + Palau
+        % + Northern Mariana Islands + Marshall Islands (<= 1994)
+        regions2Delete = {'TTPI'};
+        theseCountries = get_members_of(regions2Delete{1}); %{'FSM'  'MHL'    'MNP'    'PLW'};
+        yearKey = 1995;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % BELLUX = Belgium + Luxembourg (<= 1999)
+        regions2Delete = {'BELLUX'};
+        theseCountries = get_members_of(regions2Delete{1});
+        yearKey = 2000;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % ETHERI = Ethiopia + Eritrea (<= 1992)
+        regions2Delete = {'ETHERI'};
+        theseCountries = get_members_of(regions2Delete{1});
+        yearKey = 1993;
+        thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+
+        % Sudan and South Sudan (South Sudan has individual data for 2012 onwards)
+        if any(strcmp(source, {'FAO2016P', 'FAO2018P', 'FAO2019AP', 'FAO2019BP', 'FAO2020AP', 'FAO2020BP', ...
+                'FAO2020CP', 'FAO2021AP'}))
+            regions2Delete = {'SDN'};
+            theseCountries = {'SDN', 'SSD'};
+            yearKey = 2012;
+            thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+        end
+
+        % Sudan and South Sudan (South Sudan has individual data for 2012 onwards)
+        if any(strcmp(source, {'FAO2023AI', 'FAO2024AI'}))
+            regions2Delete = {'SDNSSD'};
+            theseCountries = {'SDN', 'SSD'};
+            yearKey = 2012;
+            thisTable = downscale_existing(thisTable,regions2Delete,theseCountries,yearKey, nYearKey, true);
+        end
+
+        % interpolate gap in FSM data
+        dataFSM = get_cydata_from_independent_table(thisTable, {'FSM'}, 'all');
+        if ~all(isnan(dataFSM.(CONSTANTS.nameOfDataField)))
+            gaps = get_gaps(dataFSM.(CONSTANTS.nameOfDataField));
+            if ~isempty(gaps)
+                dataFSM.(CONSTANTS.nameOfDataField) = interpolate_in_single_dv(gaps, dataFSM.(CONSTANTS.nameOfDataField));
+                thisTable = add_cydata_to_independent_table(thisTable, dataFSM);
+            end
+        end
+
+        % remove countries which contain only NaN data
+        thisTable = delete_only_nan_containing_countries(thisTable);
+        % remove countries which contain only zero data
+        newTable = delete_only_zero_containing_countries(thisTable);
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    else
+        newTable = struct;
+    end
+end
+
+
+function thisTable = downscale_existing(thisTable,regions2Delete,theseCountries, yearKey, nYearKey, remove)
+    % split regional data proportionally, based on ratios in first year
+    % after yearKey where there is at least one nonzero value
+
+    global CONSTANTS
+    errorID = 'CreateDatabase:DataPreparation:FAO:PrepareFAO2016Data:DownscaleExisting';
+    if ~exist('remove', 'var')
+        remove = true;
+    end
+
+    cyRegion = get_cydata_from_independent_table(thisTable,regions2Delete,'all');
+    if isempty(cyRegion.(CONSTANTS.nameOfCountryVector))
+        return;
+    end
+    cyKey = get_cydata_from_independent_table(thisTable,theseCountries,'all');
+    cyDownscaled = cyRegion;
+    cyDownscaled.(CONSTANTS.nameOfCountryVector) = cyKey.(CONSTANTS.nameOfCountryVector);
+    %split only if region is not all NaN or zeros
+    if ~all(isnan(cyRegion.(CONSTANTS.nameOfDataField)) | cyRegion.(CONSTANTS.nameOfDataField) == 0)
+        % remove all years before yearKey
+        iYearKey = find(cyKey.(CONSTANTS.nameOfYearVector) == yearKey);
+        cyKey.(CONSTANTS.nameOfYearVector) = cyKey.(CONSTANTS.nameOfYearVector)(iYearKey:end);
+        cyKey.(CONSTANTS.nameOfDataField) = cyKey.(CONSTANTS.nameOfDataField)(:,iYearKey:end);
+        % check which year the first nonzero value arises
+        iYearStart = find(~isnan(nansum_primap(cyKey.(CONSTANTS.nameOfDataField),1)) & ...
+            (nansum_primap(cyKey.(CONSTANTS.nameOfDataField),1) ~= 0));
+        if isempty(iYearStart)
+            % cannot divide regional data
+            primap_log([errorID 'ProgramError'], CONSTANTS.ERRuserErrorWarning, ...
+                ['Downscaling not possible for region ' regions2Delete{1} ...
+                ', year(s) for downscaling key have only zeros or NaN. Table: ' ...
+                thisTable.(CONSTANTS.nameOfSheetCode)]);
+            if remove
+                thisTable = remove_countries_from_independent_table(thisTable,regions2Delete);
+            end
+            return
+        end
+        % for the data to be downscaled remove all data beginning with the startYear
+        yearStart = cyKey.(CONSTANTS.nameOfYearVector)(iYearStart(1));
+        iYearStartRegion = find(cyDownscaled.(CONSTANTS.nameOfYearVector) == yearStart);
+        cyDownscaled.(CONSTANTS.nameOfDataField) = cyDownscaled.(CONSTANTS.nameOfDataField)(:, 1 : iYearStartRegion - 1);
+        cyDownscaled.(CONSTANTS.nameOfYearVector) = cyDownscaled.(CONSTANTS.nameOfYearVector)(1 : iYearStartRegion - 1);
+        % for the key remove all years before yearStart
+        cyKey.(CONSTANTS.nameOfYearVector) = cyKey.(CONSTANTS.nameOfYearVector)(iYearStart(1):end);
+        cyKey.(CONSTANTS.nameOfDataField) = cyKey.(CONSTANTS.nameOfDataField)(:,iYearStart(1):end);
+        % key is constructed from nYearKey years or available years,
+        % whatever is less
+        if length(cyKey.(CONSTANTS.nameOfYearVector)) > nYearKey
+            cyKey.(CONSTANTS.nameOfYearVector) = cyKey.(CONSTANTS.nameOfYearVector)(1:nYearKey);
+            cyKey.(CONSTANTS.nameOfDataField) = cyKey.(CONSTANTS.nameOfDataField)(:,1:nYearKey);
+        end
+        key = nansum_primap(cyKey.(CONSTANTS.nameOfDataField),2);
+        arrRatio = repmat(key/nansum_primap(key), ...
+            1,length(cyDownscaled.(CONSTANTS.nameOfYearVector)));
+        cyDownscaled.(CONSTANTS.nameOfDataField) = arrRatio .* ...
+            repmat(cyDownscaled.(CONSTANTS.nameOfDataField),length(cyKey.(CONSTANTS.nameOfCountryVector)),1);
+        cyDownscaled = delete_only_nan_containing_countries(cyDownscaled);
+
+        % add downscaled numbers to existing data
+        thisCydata = get_cydata_from_independent_table(thisTable,theseCountries,'all');
+        %thisCydata = delete_only_nan_containing_countries(thisCydata);
+        warning off Tools:AssertContentOfCydata:FailedValidation
+        thisNewCydata = add_cydata_to_cydata(thisCydata, cyDownscaled);
+        %thisNewCydata = summation_of_cydata(thisCydata, cyDownscaled,false,true);
+        warning on Tools:AssertContentOfCydata:FailedValidation
+        if remove
+            thisTable = remove_countries_from_independent_table(thisTable,regions2Delete);
+        end
+        thisTable = add_cydata_to_independent_table(thisTable,thisNewCydata);
+    else
+        if remove
+            thisTable = remove_countries_from_independent_table(thisTable,regions2Delete);
+        end
+    end
+end
+
+
+
+
+% countries in the source which are not UNFCCC
+%   'ARUBA'
+%    'ANGUILLA'
+%    'NETHERLANDS ANTILLES'
+%    'AMERICAN SAMOA'
+%    'BERMUDA'
+%    'CHANNEL ISLANDS'
+%    'CAYMAN ISLANDS'
+%    'WESTERN SAHARA'
+%    'FALKLAND ISLANDS (MALVINAS)'
+%    'FAROE ISLANDS'
+%    'GIBRALTAR'
+%    'GUADELOUPE'
+%    'GREENLAND'
+%    'FRENCH GUIANA'
+%    'GUAM'
+%    'HONG KONG'
+%    'ISLE OF MAN'
+%    'MACAO'
+%    'NORTHERN MARIANA ISLANDS'
+%    'MONTSERRAT'
+%    'MARTINIQUE'
+%    'MAYOTTE'
+%    'NEW CALEDONIA'
+%    'NORFOLK ISLAND'
+%    'PITCAIRN, HENDERSON, DUICE AN...'
+%    'PUERTO RICO'
+%    'PALESTINIAN TERRITORY, Occupied'
+%    'FRENCH POLYNESIA'
+%    'REUNION'
+%    'SAINT HELENA, ASCENSION AND T...'
+%    'SVALBARD AND JAN MAYEN ISLANDS'
+%    'SAINT PIERRE AND MIQUELON'
+%    'TURKS AND CAICOS ISLANDS'
+%    'TOKELAU'
+%    'TAIWAN'
+%    'VIRGIN ISLANDS (BRITISH)'
+%    'VIRGIN ISLANDS (U.S.)'
+%    'WALLIS AND FUTUNA ISLANDS'

+ 148 - 32
src/faostat_data_primap/helper/category_aggregation.py

@@ -382,7 +382,13 @@ agg_info_ipcc2006_primap = {
     "category (IPCC2006_PRIMAP)": {
         "3.C.1": {  # Emissions from Biomass Burning
             "sources": [
-                "3.C.1.a",  # Biomass Burning In Forest Lands
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+        },
+        "M.3.C.1.AG": {  # Emissions from Biomass Burning
+            "sources": [
                 "3.C.1.b",  # Biomass Burning In Croplands
                 "3.C.1.c",  # Biomass Burning in Grasslands
             ],
@@ -392,27 +398,31 @@ agg_info_ipcc2006_primap = {
                 "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
                 "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
                 "3.C.4",  # Direct N2O Emissions from Managed Soils
-                "M.3.C.4.SF",  # synthetic fertilisers
-                "3.C.5",  # Indirect N2O Emissions from Managed Soils
-                "M.3.C.5.SF",  # synthetic fertilisers
-                "3.C.6",  # Indirect N2O Emissions from Manure Management
+                "M.3.C.4.SF",  # synthetic fertilisers direct
+                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
+                "M.3.C.5.SF",  # synthetic fertilisers indirect
+                # "3.C.6",  # Indirect N2O Emissions from Manure Management
                 "3.C.7",  # rice cultivation
+                "3.B.2",  # Drained grassland, was in LULUCF orginally
+                "3.B.3",  # Drained cropland, was in LULUCF originally
             ],
+            "sel": {"variable": ["N2O"]},
         },
         "M.AG.ELV": {
-            "sources": ["M.3.C.AG"],  # "M.3.D.AG" is zero
+            "sources": ["M.3.C.AG"],
+            "sel": {"variable": ["N2O"]},  # "M.3.D.AG" is zero
         },
         "3.C": {
             "sources": [
-                "3.C.1",
-                "3.C.2",
-                "3.C.3",
-                "3.C.4",  # excluding synthetic fertilisers
-                "M.3.C.4.SF",  # synthetic fertilisers
-                "3.C.5",  # excluding synthetic fertilisers
-                "M.3.C.5.SF",  # synthetic fertilisers
-                "3.C.6",
-                "3.C.7",
+                "M.3.C.1.AG",  # maybe better 3.C.1?
+                "3.C.4",  # Direct N2O Emissions from Managed Soils
+                "M.3.C.4.SF",  # synthetic fertilisers direct
+                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
+                "M.3.C.5.SF",  # synthetic fertilisers indirect
+                # "3.C.6",  # Indirect N2O Emissions from Manure Management
+                "3.C.7",  # rice cultivation
+                "3.B.2",  # Drained grassland, was in LULUCF orginally
+                "3.B.3",  # Drained cropland, was in LULUCF originally
             ]
         },
         "3.A.1.a": {  # enteric fermentation
@@ -440,30 +450,136 @@ agg_info_ipcc2006_primap = {
                 "3.A.2.a.ii",  # cattle (non-dairy)
             ]
         },
-        "3.A.2": {  # decomposition of manure - CH4, N2O
-            "sources": [
-                "3.A.2.a",
-                "3.A.2.b",
-                "3.A.2.c",
-                "3.A.2.d",
-                "3.A.2.e",
-                "3.A.2.f",
-                "3.A.2.g",
-                "3.A.2.h",
-                "3.A.2.i",
-                "3.A.2.j",
-            ]
-        },
+        # "3.A.2": {  # decomposition of manure - CH4, N2O
+        #     "sources": [
+        #         "3.A.2.a",
+        #         "3.A.2.b",
+        #         "3.A.2.c",
+        #         "3.A.2.d",
+        #         "3.A.2.e",
+        #         "3.A.2.f",
+        #         "3.A.2.g",
+        #         "3.A.2.h",
+        #         "3.A.2.i",
+        #         "3.A.2.j",
+        #     ]
+        # },
         "3.A": {"sources": ["3.A.1", "3.A.2"]},
-        "M.AG": {"sources": ["3.A", "M.AG.ELV"]},
+        "M.AG": {
+            "sources": [
+                "3.A",
+                "M.AG.ELV",
+            ],
+            "sel": {"variable": ["N2O"]},
+        },
+        "M.LULUCF": {
+            "sources": [
+                "3.B.1",  # Carbon stock change in forests (FAO 4, or 4.A and 4.B)
+                "M.NFC",
+                # "3.B.2",  # Drained grassland
+                # "3.B.3",  # Drained cropland
+                "3.C.1.a",  # Biomass Burning In Forests
+            ],
+            "sel": {"variable": ["N2O"]},
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "sel": {"variable": ["N2O"]},
+        },
+    }
+}
+
+agg_info_ipcc2006_primap_CO2 = {
+    "category (IPCC2006_PRIMAP)": {
+        "M.3.C.AG": {
+            "sources": [
+                # "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
+                # "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
+                # "3.C.4",  # Direct N2O Emissions from Managed Soils, only N2O
+                # "M.3.C.4.SF",  # synthetic fertilisers direct, only N2O
+                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, only N2O
+                # "M.3.C.5.SF",  # synthetic fertilisers indirect, only N2O
+                # "3.C.6",  # Indirect N2O Emissions from Manure Management, only N2O
+                # "3.C.7",  # rice cultivation, only CH4
+                # "3.B.2",  # Drained grassland, is already in LULUCF and seems to fit
+                # "3.B.3",  # Drained cropland, is already in LULUCF and seems to fit
+                # "2.G",  # pesticides and fertilisers manufacturing, doesn't match
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "M.AG.ELV": {
+            "sources": [
+                "M.3.C.AG",
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "M.AG": {
+            "sources": [
+                "3.A",
+                "M.AG.ELV",
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "M.LULUCF": {
+            "sources": [
+                "3.B.1",  # Carbon stock change in forests (FAO 4, or 4.A and 4.B)
+                "3.B.2",  # Drained grassland
+                "3.B.3",  # Drained cropland
+                # "3.C.1.a",  # Biomass Burning In Forests
+                "3.C.1.b",  # Fires in organic soils
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "sel": {"variable": ["CO2"]},
+        },
+    }
+}
+
+
+agg_info_ipcc2006_primap_CH4 = {
+    "category (IPCC2006_PRIMAP)": {
+        "M.3.C.AG": {
+            "sources": [
+                "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
+                "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
+                # "3.C.4",  # Direct N2O Emissions from Managed Soils
+                # "M.3.C.4.SF",  # synthetic fertilisers direct
+                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
+                # "M.3.C.5.SF",  # synthetic fertilisers indirect
+                # "3.C.6",  # Indirect N2O Emissions from Manure Management
+                "3.C.7",  # rice cultivation
+                # "3.B.2",  # Drained grassland, was in LULUCF orginally
+                # "3.B.3",  # Drained cropland, was in LULUCF originally
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "M.AG.ELV": {
+            "sources": [
+                "M.3.C.AG",
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "M.AG": {
+            "sources": [
+                "3.A",
+                "M.AG.ELV",
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
         "M.LULUCF": {
             "sources": [
                 "3.B.1",  # Carbon stock change in forests
                 "3.B.2",  # Drained grassland
                 "3.B.3",  # Drained cropland
                 "3.C.1.a",  # Biomass Burning In Forests
-            ]
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "sel": {"variable": ["CH4"]},
         },
-        "3": {"sources": ["M.AG", "M.LULUCF"]},
     }
 }

+ 34 - 20
src/faostat_data_primap/helper/definitions.py

@@ -294,13 +294,13 @@ read_config_all: Any = {
                 "Rye - Crops total (Emissions N2O)": "1.A.10",
                 "Sorghum - Crops total (Emissions N2O)": "1.A.11",
                 "Soya beans - Crops total (Emissions N2O)": "1.A.12",
+                "All Crops - Crop residues (Emissions N2O)": "M.1.CR",
+                "All Crops - Burning crop residues (Emissions N2O)": "M.1.BCR",
+                "All Crops - Burning crop residues (Emissions CH4)": "M.1.BCR",
             },
             "items-elements_to_remove": [
-                "All Crops - Crop residues (Emissions N2O)",
                 "All Crops - Crop residues (Direct emissions N2O)",
                 "All Crops - Crop residues (Indirect emissions N2O)",
-                "All Crops - Burning crop residues (Emissions N2O)",
-                "All Crops - Burning crop residues (Emissions CH4)",
             ],
             "columns_to_drop": [
                 "Element",
@@ -417,9 +417,11 @@ read_config_all: Any = {
                 "Rice - Crops total (Emissions CH4)": "1.A.2",
                 "Wheat - Crops total (Emissions N2O)": "1.A.1",
                 "Wheat - Crops total (Emissions CH4)": "1.A.1",
+                "All Crops - Crop residues (Emissions N2O)": "M.1.CR",
+                "All Crops - Burning crop residues (Emissions N2O)": "M.1.BCR",
+                "All Crops - Burning crop residues (Emissions CH4)": "M.1.BCR",
             },
             "items-elements_to_remove": [
-                "All Crops - Crop residues (Emissions N2O)",
                 "All Crops - Crop residues (Direct emissions N2O)",
                 "All Crops - Crop residues (Indirect emissions N2O)",
                 "All Crops - Burning crop residues (Emissions N2O)",
@@ -490,6 +492,13 @@ read_config_all: Any = {
                 "Indirect emissions (N2O that volatilises) (Manure applied)": "N2O",
                 "Manure applied to soils (Indirect emissions N2O)": "N2O",
             },
+            "category_mapping_item_element": {
+                "All Animals - Enteric fermentation (Emissions CH4)": "M.3.EF",
+                "All Animals - Manure management (Emissions CH4)": "M.3.MM",
+                "All Animals - Manure management (Emissions N2O)": "M.3.MM",
+                "All Animals - Manure left on pasture (Emissions N2O)": "M.3.MP",
+                "All Animals - Emissions (N2O) (Manure applied)": "M.3.MA",
+            },
             "category_mapping_item": {
                 "All Animals": "3",
                 "Asses": "3.A",
@@ -534,12 +543,6 @@ read_config_all: Any = {
                 "Manure applied to soils (Indirect emissions N2O)": ".3.b",
             },
             "items-elements_to_remove": [
-                # we only keep All animals total CH4 and total N2O
-                "All Animals - Enteric fermentation (Emissions CH4)",
-                "All Animals - Manure management (Emissions CH4)",
-                "All Animals - Manure management (Direct emissions N2O)",
-                "All Animals - Manure management (Indirect emissions N2O)",
-                "All Animals - Manure left on pasture (Emissions N2O)",
                 "All Animals - Manure left on pasture (Direct emissions N2O)",
                 (
                     "All Animals - Indirect emissions (N2O that leaches) "
@@ -550,7 +553,6 @@ read_config_all: Any = {
                     "(Manure on pasture)"
                 ),
                 "All Animals - Manure left on pasture (Indirect emissions N2O)",
-                "All Animals - Emissions (N2O) (Manure applied)",
                 "All Animals - Manure applied to soils (Direct emissions N2O)",
                 "All Animals - Indirect emissions (N2O that leaches) (Manure applied)",
                 (
@@ -558,7 +560,13 @@ read_config_all: Any = {
                     "(Manure applied)"
                 ),
                 "All Animals - Manure applied to soils (Indirect emissions N2O)",
-                "All Animals - Manure management (Emissions N2O)",
+                "All Animals - Manure management (Direct emissions N2O)",
+                "All Animals - Manure management (Indirect emissions N2O)",
+                # "All Animals - Enteric fermentation (Emissions CH4)",
+                # "All Animals - Manure management (Emissions CH4)",
+                # "All Animals - Manure management (Emissions N2O)",
+                # "All Animals - Manure left on pasture (Emissions N2O)",
+                # "All Animals - Emissions (N2O) (Manure applied)",
             ],
             "columns_to_drop": [
                 "Element",
@@ -623,6 +631,13 @@ read_config_all: Any = {
                 "Indirect emissions (N2O that volatilises) (Manure applied)": "N2O",
                 "Manure applied to soils (Indirect emissions N2O)": "N2O",
             },
+            "category_mapping_item_element": {
+                "All Animals - Enteric fermentation (Emissions CH4)": "M.3.EF",
+                "All Animals - Manure management (Emissions CH4)": "M.3.MM",
+                "All Animals - Manure management (Emissions N2O)": "M.3.MM",
+                "All Animals - Manure left on pasture (Emissions N2O)": "M.3.MP",
+                "All Animals - Emissions (N2O) (Manure applied)": "M.3.MA",
+            },
             "category_mapping_item": {
                 "All Animals": "3",
                 "Asses": "3.A",
@@ -667,12 +682,6 @@ read_config_all: Any = {
                 "Manure applied to soils (Indirect emissions N2O)": ".3.b",
             },
             "items-elements_to_remove": [
-                # we only keep All animals total CH4 and total N2O
-                "All Animals - Enteric fermentation (Emissions CH4)",
-                "All Animals - Manure management (Emissions CH4)",
-                "All Animals - Manure management (Direct emissions N2O)",
-                "All Animals - Manure management (Indirect emissions N2O)",
-                "All Animals - Manure left on pasture (Emissions N2O)",
                 "All Animals - Manure left on pasture (Direct emissions N2O)",
                 (
                     "All Animals - Indirect emissions (N2O that leaches) "
@@ -683,7 +692,6 @@ read_config_all: Any = {
                     "(Manure on pasture)"
                 ),
                 "All Animals - Manure left on pasture (Indirect emissions N2O)",
-                "All Animals - Emissions (N2O) (Manure applied)",
                 "All Animals - Manure applied to soils (Direct emissions N2O)",
                 "All Animals - Indirect emissions (N2O that leaches) (Manure applied)",
                 (
@@ -691,7 +699,13 @@ read_config_all: Any = {
                     "(Manure applied)"
                 ),
                 "All Animals - Manure applied to soils (Indirect emissions N2O)",
-                "All Animals - Manure management (Emissions N2O)",
+                "All Animals - Manure management (Direct emissions N2O)",
+                "All Animals - Manure management (Indirect emissions N2O)",
+                # "All Animals - Enteric fermentation (Emissions CH4)",
+                # "All Animals - Manure management (Emissions CH4)",
+                # "All Animals - Manure management (Emissions N2O)",
+                # "All Animals - Manure left on pasture (Emissions N2O)",
+                # "All Animals - Emissions (N2O) (Manure applied)",
             ],
             "columns_to_drop": [
                 "Element",

+ 19 - 7
src/faostat_data_primap/read.py

@@ -136,7 +136,7 @@ def read_data(  # noqa: PLR0915 PLR0912
         # sometimes there are too many categories per domain to write
         # everything in the config file
         # TODO we could do this for crops as well, but it's not necessary
-        elif ("category_mapping_element" in read_config.keys()) and (
+        if ("category_mapping_element" in read_config.keys()) and (
             "category_mapping_item" in read_config.keys()
         ):
             # split steps for easier debugging
@@ -146,9 +146,21 @@ def read_data(  # noqa: PLR0915 PLR0912
             df_domain["mapped_element"] = df_domain["Element"].map(
                 read_config["category_mapping_element"]
             )
-            df_domain["category"] = (
-                df_domain["mapped_item"] + df_domain["mapped_element"]
-            )
+            if "category" in df_domain.columns:
+                df_domain["category_1"] = (
+                    df_domain["mapped_item"] + df_domain["mapped_element"]
+                )
+                df_domain["category"] = df_domain["category"].fillna(
+                    df_domain["category_1"]
+                )
+                df_domain = df_domain.drop(
+                    labels=["category_1"],
+                    axis=1,
+                )
+            else:
+                df_domain["category"] = (
+                    df_domain["mapped_item"] + df_domain["mapped_element"]
+                )
             df_domain = df_domain.drop(
                 labels=[
                     "mapped_item",
@@ -156,9 +168,6 @@ def read_data(  # noqa: PLR0915 PLR0912
                 ],
                 axis=1,
             )
-        else:
-            msg = f"Could not find mapping for {domain=}."
-            raise ValueError(msg)
 
         # some rows can only be removed by Item - Element column
         if "items-elements_to_remove" in read_config.keys():
@@ -167,6 +176,9 @@ def read_data(  # noqa: PLR0915 PLR0912
                     read_config["items-elements_to_remove"]
                 )
             ]
+        # else:
+        #     msg = f"Could not find mapping for {domain=}."
+        #     raise ValueError(msg)
 
         # drop combined item - element columns
         df_domain = df_domain.drop(

+ 37 - 15
tests/unit/test_conversion.py

@@ -5,6 +5,8 @@ import xarray as xr
 from src.faostat_data_primap.helper.category_aggregation import (
     agg_info_fao,
     agg_info_ipcc2006_primap,
+    agg_info_ipcc2006_primap_CH4,
+    agg_info_ipcc2006_primap_CO2,
 )
 from src.faostat_data_primap.helper.paths import (
     downloaded_data_path,
@@ -26,6 +28,10 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     }
     # release_name = "v2024-11-14"
     release_name = "v2023-12-13"
+
+    # reproduce 2023 data set
+    reproduce23 = True
+
     ds_fao = (
         extracted_data_path
         # / "v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc"
@@ -44,9 +50,16 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     # That's a temporary workaround until convert function can filter for data variables (entities)
     conv = {}
     gases = ["CO2", "CH4", "N2O"]
+
+    if reproduce23:
+        reproduce23_filename = "_reproduce23"
+    else:
+        reproduce23_filename = ""
+
     for var in gases:
         conv[var] = cc.Conversion.from_csv(
-            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}.csv", cats=cats
+            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}{reproduce23_filename}.csv",
+            cats=cats,
         )
 
     # convert for each entity
@@ -65,10 +78,19 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     result_if = result.pr.to_interchange_format()
     result = pm2.pm2io.from_interchange_format(result_if)
 
+    # aggregation for each gas for better understanding
     result_proc = result.pr.add_aggregates_coordinates(
         agg_info=agg_info_ipcc2006_primap
     )
 
+    result_proc = result_proc.pr.add_aggregates_coordinates(
+        agg_info=agg_info_ipcc2006_primap_CO2
+    )
+
+    result_proc = result_proc.pr.add_aggregates_coordinates(
+        agg_info=agg_info_ipcc2006_primap_CH4
+    )
+
     result_proc_if = result_proc.pr.to_interchange_format()
 
     # save processed data
@@ -112,20 +134,20 @@ def test_read(tmp_path):
 
 def test_read_2023():
     domains_and_releases_to_read = [
-        # ("farm_gate_agriculture_energy", "2023-12-13"),
-        # ("farm_gate_emissions_crops", "2023-11-09"),
-        # ("farm_gate_livestock", "2023-11-09"),
-        # ("land_use_drained_organic_soils", "2023-11-09"),
-        # ("land_use_fires", "2023-11-09"),
-        # ("land_use_forests", "2023-11-09"),
-        # ("pre_post_agricultural_production", "2023-11-09"),
-        ("farm_gate_agriculture_energy", "2024-11-14"),
-        ("farm_gate_emissions_crops", "2024-11-14"),
-        ("farm_gate_livestock", "2024-11-14"),
-        ("land_use_drained_organic_soils", "2024-11-14"),
-        ("land_use_fires", "2024-11-14"),
-        ("land_use_forests", "2024-11-14"),
-        ("pre_post_agricultural_production", "2024-11-14"),
+        ("farm_gate_agriculture_energy", "2023-12-13"),
+        ("farm_gate_emissions_crops", "2023-11-09"),
+        ("farm_gate_livestock", "2023-11-09"),
+        ("land_use_drained_organic_soils", "2023-11-09"),
+        ("land_use_fires", "2023-11-09"),
+        ("land_use_forests", "2023-11-09"),
+        ("pre_post_agricultural_production", "2023-11-09"),
+        # ("farm_gate_agriculture_energy", "2024-11-14"),
+        # ("farm_gate_emissions_crops", "2024-11-14"),
+        # ("farm_gate_livestock", "2024-11-14"),
+        # ("land_use_drained_organic_soils", "2024-11-14"),
+        # ("land_use_fires", "2024-11-14"),
+        # ("land_use_forests", "2024-11-14"),
+        # ("pre_post_agricultural_production", "2024-11-14"),
     ]
 
     read_data(