#' Imports a DG-AGRI csv into fadnUtils
#'
#' It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
#'
#' @param file.path the full path of the file (the filename must be included)
#' @param raw.f the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files; else do not copy
#'
#' @return NULL
#' @export
#'
#' @examples
import.fadn.csv <- function (file.path,
raw.f=NULL,
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv=F) {
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(is.null(raw.f)) {
cat("You have to give a raw_str_map. Exiting ....\n")
return(FALSE)
}
if(convert.to.fadn.raw.rds(file.path,sepS,fadn.year,fadn.country,keep.csv)) {
convert.to.fadn.str.rds(fadn.country,fadn.year,raw.f)
} else {
cat("Failed to import. Exiting ...\n")
return(invisible(NULL))
}
}
#' Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds
#'
#' It saves two files:
#' - One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
#' - One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
#'
#' @param file.path the full path of the csv file (the filename must be included)
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files to the CSV directory; else do not copy
#'
#' @return Saves the fadn.raw.rds file and returns TRUE if everything goes well
#' @import data.table
#'
#' @export
#' @examples
convert.to.fadn.raw.rds <- function(file.path="",
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv = F,
col.id = "ID") {
library(data.table)
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(FALSE)
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(file.exists(paste0(data.dir,"/csv/",csv.file))) {cat("File exists. Overwriting ...\n")}
#copy csv to data.dir/csv
if(keep.csv) {
print(" copying file")
file.copy(file.path,paste0(data.dir,"/csv/",csv.file))
}
#convert to uncompressed rds and save
print(" creating fadn.raw.rds")
data.raw = data.table(read.csv(file.path,header = T, as.is = T))
colnames(data.raw) <- sapply(colnames(data.raw), function(x) unlist(strsplit(x, "\\."))[1])
attr(data.raw,"original.file.path") <-file.path
attr(data.raw,"fadn.year")<-fadn.year
attr(data.raw,"fadn.country")<-fadn.country
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
saveRDS(data.raw,paste0(data.dir,"/rds/",data.name))
# #convert to compressed rds and save
# data.raw.classes = data.table(col.name=names(data.raw),col.class=sapply(data.raw,class))
# data.raw.compr = list()
#
# char.cols = c(col.id, data.raw.classes[col.class=="character",col.name])
# data.raw.compr$data.char = data.raw[,..char.cols]
#
# num.cols = c(col.id, data.raw.classes[!col.class=="character",col.name])
# data.raw.compr$data.num = melt(data.raw[,..num.cols],id.vars = col.id)[!value==0]
#
# attr(data.raw.compr,"original.file.path") <-file.path
# attr(data.raw.compr,"fadn.year")<-fadn.year
# attr(data.raw.compr,"fadn.country")<-fadn.country
# attr(data.raw.compr,"col.names")<-names(data.raw)
# attr(data.raw.compr,"col.id")<-col.id
#
# data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".compressed.rds")
# saveRDS(data.raw.compr,paste0(data.dir,"/rds/",data.name))
return(invisible(TRUE))
}
#' Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file
#'
#' The raw_str_map.json specification is as follows:
#'
#' {
#' "id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
#' "info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
#' "livestock": {}
#' "crops": {
#' "CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' "CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' ....
#' }
#' }
#'
#'
#' The structure of the str.dir:
#' - A data.dir can hold more than one extractions.
#' - Each extraction has a short name (20 or less characters, whitespace is not allowed)
#' - Each extraction is stored in the data.dir/rds/<extraction_name>
#' - That folder contains the following files:
#' + raw_str_map.json: the raw_str_map
#' + fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
#'
#' Notes:
#' 1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
#' All are data.tables. For all of them, the first columns are those that are contained in the "id" object
#' "info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
#' related raw_str_map.json file.
#' "crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
#' row is a farm-crop-variableName-value combination
#'
#' 2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
#' i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
#' ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
#'
#' 3) We attach certain attributes that are useful for identifying informations:
#' i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
#' ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
#' iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
#'
#'
#'
#'
#' @param fadn.country string with the country to extract the str data
#' @param fadn.year the year to extract the structured data
#' @param raw_str_map.file the full path to the raw_str_map file.
#' @param str.short_name the short name of the str data. No spaces and text up to 20 characters
#' @param DEBUG if TRUE, prints more details on the conversion process
#'
#' @return Saves the rds.str.fadn and returns TRUE if everything goes well
#'