% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handle_rds_data.R
\name{load.fadn.raw.rds}
\alias{load.fadn.raw.rds}
\title{Load all rds.raw.FADN data for selcted years and countries (rbinds them)}
\usage{
load.fadn.raw.rds(
countries = c("all"),
years = c("all"),
col.filter = NULL,
row.filter = NULL
)
}
\arguments{
\item{countries}{a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
If "all" is included, all available countries are loaded}
\item{years}{a numeric vector with the years selected. If "all" is included, all available years are loaded}
\item{col.filter}{a character vector with the columns to load. If NULL, all columns are loaded. E.g columns=c('ILOTH_VET_V', 'ILVOTH_V','id')}
\item{row.filter}{a string giving an expression that will be evaluated in order to select rows. If NULL, all rows are returned. E.g. filter='TF8==1'}
}
\value{
list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
}
\description{
It adds two columns: load.YEAR and load.COUNTRY in each row. This can be used to group per year,country the data
}
\examples{
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manage_data_dir.R
\name{get.available.fadn.raw.rds}
\alias{get.available.fadn.raw.rds}
\title{Returns the available YEAR-COUNTRY fadn.raw.rds}
\usage{
get.available.fadn.raw.rds(data.dir = NULL)
}
\value{
a DT of the available YEAR-COUNTRY fadn.raw.rds
}
\description{
Returns the available YEAR-COUNTRY fadn.raw.rds
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convert_data.R
\name{import.fadn.csv}
\alias{import.fadn.csv}
\title{Imports a DG-AGRI csv into fadnUtils}
\usage{
import.fadn.csv(
file.path,
raw.f = NULL,
sepS = ",",
fadn.year = NA,
fadn.country = NA,
keep.csv = F
)
}
\arguments{
\item{file.path}{the full path of the file (the filename must be included)}
\item{raw.f}{the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir}
\item{sepS}{the separator of the csv files (by default ",")}
\item{fadn.year}{the year the csv files refers to (e.g. 2001)}
\item{fadn.country}{the three letter country code the csv files refers to (e.g. "ELL")}
\item{keep.csv}{if TRUE, copy the csv files; else do not copy}
}
\description{
It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
}
#This file contains functions related to managing raw_str_map files
#' Merges two raw_str_map files and returns either a list or a file
#'
#' All entries in the new.raw_str_map file replace those on the source.raw_str_map file
#'
#' Both files must be relative to the current data.dir/raw_str_maps
#'
#' @param source.raw_str_map.file the filename of the source raw_str_map. It must be relative the raw_str_maps of the current data.dir
#' @param new.raw_str_map.file the filename of the mask raw_str_map. It will replace any entries of the source file. It must be relative the raw_str_maps of the current data.dir
#' @param return.file If set to T, a temporary full file path that contains the merge is returned. Otherwise a list with the contents of the merge is returned
#'
#' @return FALSE in case of problem / if return.file=T, the temporary full path of a file that contains the merged result in json / A list with the contents of the merge if return.file=F
#' @export
#'
#' @examples
raw_str_map.merge = function(source.raw_str_map.file=NULL,
new.raw_str_map.file=NULL,
return.file=F){
#Check that data.dir exist and raw_str_map files exist ----
## check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(invisible(FALSE))
}
##if data.dir is a proper dat.dir
if(check.data.dir.structure()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
raw_str_map.file = paste0(data.dir,'/raw_str_map.json')
}
##check if raw
source.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",source.raw_str_map.file)
new.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",new.raw_str_map.file)
if(!file.exists(source.raw_str_map.path) | !file.exists(new.raw_str_map.path)) {
warning("One or more of the raw_str_maps provided, do not exist. Exiting ....")
return(FALSE)
}
#read json files to a list ----
library("jsonlite")
source.raw_str_map = fromJSON(paste(readLines(source.raw_str_map.path), collapse="\n"))
new.raw_str_map = fromJSON(paste(readLines(new.raw_str_map.path), collapse="\n"))
#merge both files----
merged.raw_str_map = modifyList(source.raw_str_map,new.raw_str_map)
#save or export results----
if(return.file) {
#save to temporary file as JSON
f.tmp = tempfile(pattern = "file", tmpdir = tempdir())
cat(
toJSON(merged.raw_str_map, auto_unbox = T),
"\n",
file=f.tmp
)
return(f.tmp)
}
else {
return(merged.raw_str_map)
}
}
#' Takes $id, $info, $costs objects of a raw_str_map object and create Source-Description pairs
#'
#' Used internally
#'
#' @param listcontent
#'
#' @return list(COLUMN-NAME = c(SOURCE=csv column name, DESCRIPTION=description of column), ..... )
#'
#' @examples
take.raw_str_map.columns = function( listcontent ) {
str.cols = names(listcontent)
splitcol = function(x) {
if(is.list(x)) {
return(c("SOURCE"=x$source, "DESCRIPTION"=x$description))
} else {
return(c("SOURCE"=x, "DESCRIPTION"=""))}
}
return(
lapply(listcontent,splitcol)
)
}
#' Checks if the definitions of a raw_str_map are compatible with a fadn.raw.rds for a certain year and country
#'
#' Checks if all values are actual columns of the fadn.raw.rds file
#'
#' @param fadn.country
#' @param fadn.year
#' @param raw_str_map.file The full filepath of the raw_str_map
#'
#' @return
#' @export
#'
#' @examples
check.raw_str_map = function(raw_str_map.file,
fadn.country=NA,
fadn.year=NA) {
#TODO
#check if raw_str_map exist
#TODO
#check if fadn.raw.rds exist
#read file
raw_str_map.file = paste0(data.dir,"/raw_str_maps/",raw_str_map.file)
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#get all keys
map.keys.orig = c( unlist(raw_str_map$id),
unlist(raw_str_map$info),
unlist(raw_str_map$crops,recursive = T)
)
#split all characters
map.keys.orig.split = unlist(
strsplit(map.keys.orig,
"\\+|\\-|,|ifelse|\\(|\\)|\\>|\\/|\\<"),
recursive = T
)
#remove whitespaces and empty
map.keys.orig.split=trimws(map.keys.orig.split)
map.keys.orig.split=map.keys.orig.split[!map.keys.orig.split==""]
#get the column listing of fadn.raw.rds
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
rds.data = readRDS(paste0(rds.dir,data.name))
rds.data.columns = colnames(rds.data)
#print what is in raw_str_map but not in fadn.raw.rds
cat(paste0("What is in raw_str_map but not in ",data.name,"\n"))
map.keys.orig.split[!map.keys.orig.split%in%rds.data.columns]
}
#' Imports a DG-AGRI csv into fadnUtils
#'
#' It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
#'
#' @param file.path the full path of the file (the filename must be included)
#' @param raw.f the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files; else do not copy
#'
#' @return NULL
#' @export
#'
#' @examples
import.fadn.csv <- function (file.path,
raw.f=NULL,
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv=F) {
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(is.null(raw.f)) {
cat("You have to give a raw_str_map. Exiting ....\n")
return(FALSE)
}
if(convert.to.fadn.raw.rds(file.path,sepS,fadn.year,fadn.country,keep.csv)) {
convert.to.fadn.str.rds(fadn.country,fadn.year,raw.f)
} else {
cat("Failed to import. Exiting ...\n")
return(invisible(NULL))
}
}
#' Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds
#'
#' It saves two files:
#' - One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
#' - One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
#'
#' @param file.path the full path of the csv file (the filename must be included)
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files to the CSV directory; else do not copy
#'
#' @return Saves the fadn.raw.rds file and returns TRUE if everything goes well
#' @import data.table
#'
#' @export
#' @examples
convert.to.fadn.raw.rds <- function(file.path="",
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv = F,
col.id = "ID") {
library(data.table)
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(FALSE)
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(file.exists(paste0(data.dir,"/csv/",csv.file))) {cat("File exists. Overwriting ...\n")}
#copy csv to data.dir/csv
if(keep.csv) {
print(" copying file")
file.copy(file.path,paste0(data.dir,"/csv/",csv.file))
}
#convert to uncompressed rds and save
print(" creating fadn.raw.rds")
data.raw = data.table(read.csv(file.path,header = T, as.is = T))
attr(data.raw,"original.file.path") <-file.path
attr(data.raw,"fadn.year")<-fadn.year
attr(data.raw,"fadn.country")<-fadn.country
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
saveRDS(data.raw,paste0(data.dir,"/rds/",data.name))
#convert to compressed rds and save
data.raw.classes = data.table(col.name=names(data.raw),col.class=sapply(data.raw,class))
data.raw.compr = list()
char.cols = c(col.id, data.raw.classes[col.class=="character",col.name])
data.raw.compr$data.char = data.raw[,..char.cols]
num.cols = c(col.id, data.raw.classes[!col.class=="character",col.name])
data.raw.compr$data.num = melt(data.raw[,..num.cols],id.vars = col.id)[!value==0]
attr(data.raw.compr,"original.file.path") <-file.path
attr(data.raw.compr,"fadn.year")<-fadn.year
attr(data.raw.compr,"fadn.country")<-fadn.country
attr(data.raw.compr,"col.names")<-names(data.raw)
attr(data.raw.compr,"col.id")<-col.id
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".compressed.rds")
saveRDS(data.raw.compr,paste0(data.dir,"/rds/",data.name))
return(invisible(TRUE))
}
#' Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file
#'
#' The raw_str_map.json specification is as follows:
#'
#' {
#' "id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
#' "info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
#' "livestock": {}
#' "crops": {
#' "CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' "CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' ....
#' }
#' }
#'
#'
#' The structure of the str.dir:
#' - A data.dir can hold more than one extractions.
#' - Each extraction has a short name (20 or less characters, whitespace is not allowed)
#' - Each extraction is stored in the data.dir/rds/<extraction_name>
#' - That folder contains the following files:
#' + raw_str_map.json: the raw_str_map
#' + fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
#'
#' Notes:
#' 1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
#' All are data.tables. For all of them, the first columns are those that are contained in the "id" object
#' "info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
#' related raw_str_map.json file.
#' "crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
#' row is a farm-crop-variableName-value combination
#'
#' 2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
#' i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
#' ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
#'
#' 3) We attach certain attributes that are useful for identifying informations:
#' i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
#' ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
#' iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
#'
#'
#'
#'
#' @param fadn.country string with the country to extract the str data
#' @param fadn.year the year to extract the structured data
#' @param raw_str_map.file the full path to the raw_str_map file.
#' @param str.short_name the short name of the str data. No spaces and text up to 20 characters
#' @param DEBUG if TRUE, prints more details on the conversion process
#'
#' @return Saves the rds.str.fadn and returns TRUE if everything goes well
#'
#' @export
#'
#' @examples
convert.to.fadn.str.rds <-function(fadn.country = NA,
fadn.year= NA,
raw_str_map.file=NULL,
force_external_raw_str_map=FALSE,
str.name = NULL,
DEBUG=F
) {
#check if str.short_name abides to specification
if(nchar(str.name)>20){
cat("str.name more should be 20 character and less. Exiting ....\n")
return(invisible(FALSE))
}
if (grepl("\\s",str.name)) {
cat("str.name should not contain any kind of whitespace. Exiting ....\n")
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(invisible(FALSE))
}
#if data.dir is a proper dat.dir
if(!check.data.dir.structure()) {
cat("data.dir does not have a proper structure. Exiting ....\n")
return(invisible(FALSE))
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
str.dir = paste0(rds.dir,"/",str.name,"/")
}
#create/get the raw_str_map.json file
if(is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
cat("either provide a raw_str_map or an existing extraction dir Exiting ....\n")
return(invisible(FALSE))
}
#create/get the raw_str_map.json file ----
##case a
if(!is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
dir.create(str.dir)
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"))#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case b
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==T) {
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"), overwrite = TRUE)#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case c
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==F) {
cat("Ignoring the provided raw_str_map.json fie. The conversion will use the existing raw_str_map from the extraction_dir");
}
raw_str_map.file = paste0(str.dir,"raw_str_map.json")
# read raw.rds file ----
#check if fadn.raw.rds exist
fadn.raw.rds.filename = paste0(rds.dir,"fadn.raw.",fadn.year,".",fadn.country,".rds")
if(!file.exists(fadn.raw.rds.filename)) {
cat("You have first to create a fadn.raw.rds file for the year and country (from a DG-AGRI csv file). Exiting ....\n")
return(FALSE)
}
fadn.raw.rds = readRDS(fadn.raw.rds.filename)
library("jsonlite")
#read raw_str_map file to a list ----
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#create empty return list
data.return = list()
attach(fadn.raw.rds) #o that eval works more efficiently
# .......................................................
#what id variables will be availble to all DT?
#create id ----
print("Doing id ...")
id.vars.list = take.raw_str_map.columns(raw_str_map$id)
id.dt = data.table()
id.dt.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
#intersting links
#
# https://stackoverflow.com/questions/28327738/evaluate-expression-in-r-data-table
start.time <- Sys.time()
for(k in names(id.vars.list)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", id.vars.list[[k]][["SOURCE"]])); }
#approach 1: 19 sec
# f <- function(e, .SD) eval(parse(text=e[1]), envir=.SD)
# id.dt.cur = fadn.raw.rds[,list(
# k=f(id.vars.list[[k]][["SOURCE"]],
# .SD)
# )]
# id.dt.cur = fadn.raw.rds[,list(k=f(id.vars.list[[k]][["SOURCE"]],.SD))]
#approach 0: 10 sec
#id.dt.cur = fadn.raw.rds[,list(k=eval(parse(text=id.vars.list[[k]][["SOURCE"]])))]
#approach 2: 0.3 sec
expr = eval(parse(text=id.vars.list[[k]][["SOURCE"]]))
id.dt.cur = fadn.raw.rds[,list(k=expr)]
setnames(id.dt.cur,k)
id.dt=cbind(id.dt,id.dt.cur)
id.dt.descriptions = rbind(id.dt.descriptions,
data.frame(COLUMN=k,
FORMULA=id.vars.list[[k]][["SOURCE"]],
DESCRIPTION = id.vars.list[[k]][["DESCRIPTION"]])
)
}
attr(id.dt,"column.descriptions") <- id.dt.descriptions;
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
# .......................................................
#create info ----
print("Doing info ...")
info = copy(id.dt)
info.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
##now for each info key, add the column
info.map = take.raw_str_map.columns(raw_str_map$info)
for(k in names(info.map)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", info.map[[k]][["SOURCE"]])); }
#info.cur = fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]][["SOURCE"]])))]
expr = eval(parse(text=info.map[[k]][["SOURCE"]]))
info.cur = fadn.raw.rds[,list(k=expr)]
#fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]])))]
setnames(info.cur,names(info.cur),k)
info=cbind(info,info.cur)
info.descriptions = rbind(info.descriptions,
data.frame(COLUMN=k,
FORMULA=info.map[[k]][["SOURCE"]],
DESCRIPTION = info.map[[k]][["DESCRIPTION"]])
)
}
attr(info,"column.descriptions") <- rbind( attr(info,"column.descriptions"), info.descriptions )
data.return$info = info;
# # .......................................................
# #create livestock-animals ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-animals ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$animals
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$animals = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
#
#
#
# # .......................................................
# #create livestock-products ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-products ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$products
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$products = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
# .......................................................
#create crops ----
print("Doing crops ...")
##now load the map
crops.map = raw_str_map$crops
##if not empty
if(length(names(crops.map))>0) {
crops.id = copy(id.dt)
crops.descriptions = data.frame( CROP=character(), DESCRIPTION=character() );
crops.column.formulas = data.frame( CROP=character(), COLUMN=character(), FORMULA=character());
crops = NULL #this is the DT to save the data
for(crop.key in names(crops.map)) { #loop each crop name (e.g. DWHE, BARL, etc.)
if(DEBUG) { print(paste0(" ", crop.key)); } else { cat(".")}
crop.key.map = crops.map[[crop.key]]
crops.descriptions = rbind(crops.descriptions, data.frame( CROP=crop.key, DESCRIPTION=crop.key.map[["description"]] ) )
for(k in names(crop.key.map[["columns"]])) { #loop each key within the crop name
cmd = parse(text=(crop.key.map[["columns"]][[k]]))
if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#crops.cur = fadn.raw.rds[,list(k=eval(parse(text=(crop.key.map[["columns"]][[k]]) )))]
expr = eval(parse(text=(crop.key.map[["columns"]][[k]]) ))
crops.cur = fadn.raw.rds[,list(k=expr)]
setnames(crops.cur,names(crops.cur),"VALUE")
crops.column.formulas = rbindlist(
list(crops.column.formulas,
data.frame(CROP= crop.key, COLUMN=k , FORMULA=crop.key.map[["columns"]][[k]])
)
);
if(is.null(crops)) {
crops = cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
}
else{
crops=rbind(crops[VALUE!=0],
cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
)
}
}
}
crops$VARIABLE = factor(crops$VARIABLE)
attr(crops,"column.formulas") <- crops.column.formulas
attr(crops,"crops.descriptions") <- crops.descriptions
data.return$crops = crops[VALUE!=0];
}
cat("\n")
detach(fadn.raw.rds)
# .......................................................
#save and return ----
data.name = paste0("fadn.str.",fadn.year,".",fadn.country,".rds")
if(file.exists(data.name)) {cat("Data exists. Overwriting ...\n")}
saveRDS(data.return,paste0(rds.dir,str.name,"/",data.name))
#copy the rds
return(invisible(TRUE))
}
#.................................................................................................#
#
# Load all csv files from DG-AGRI
#
#.................................................................................................#
#import also other files, fadn.raw.rds to fadn.str.rds
import.fadn.csv(paste0(dg.agri.dir,"ELL2009.csv"),
fadn.year = 2009,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2010.csv"),
fadn.year = 2010,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2011.csv"),
fadn.year = 2011,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2012.csv"),
fadn.year = 2012,
fadn.country = "ELL")
# ESP
import.fadn.csv(paste0(dg.agri.dir,"ESP2007.csv"),
fadn.year = 2007,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2008.csv"),
fadn.year = 2008,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2009.csv"),
fadn.year = 2009,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2010.csv"),
fadn.year = 2010,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2011.csv"),
fadn.year = 2011,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2012.csv"),
fadn.year = 2012,
fadn.country = "ESP")
#check what is loaded
show.data.dir.contents()
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/raw_str_map.R
\name{take.raw_str_map.columns}
\alias{take.raw_str_map.columns}
\title{Takes $id, $info, $costs objects of a raw_str_map object and create Source-Description pairs}
\usage{
take.raw_str_map.columns(listcontent)
}
\arguments{
\item{listcontent}{}
}
\value{
list(COLUMN-NAME = c(SOURCE=csv column name, DESCRIPTION=description of column), ..... )
}
\description{
Used internally
}
#....................................................................
#
#This file includes functions related to handling (loading, saving)
# data from raw.rds files or str.rds,
# saved in the RDS subdirectory of the data.dir
#
#....................................................................
#' Load all rds.raw.FADN data for selcted years and countries (rbinds them)
#'
#' It adds two columns: load.YEAR and load.COUNTRY in each row. This can be used to group per year,country the data
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loaded
#' @param col.filter a character vector with the columns to load. If NULL, all columns are loaded. E.g columns=c('ILOTH_VET_V', 'ILVOTH_V','id')
#' @param row.filter a string giving an expression that will be evaluated in order to select rows. If NULL, all rows are returned. E.g. filter='TF8==1'
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.raw.rds = function(countries=c("all"),
years=c("all"),
col.filter = NULL,
row.filter=NULL) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail.files = fadn.raw.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.raw.rds.avail.files) {
data.cur = readRDS(paste0(rds.dir, f))
if(! is.null(row.filter)) {
data.cur=data.cur[eval(parse(text=row.filter))]
}
if(! is.null(col.filter)) { #if columns is specified, load only those
col.filter.effective = col.filter[col.filter%in%names(data.cur)]
if(length(col.filter.effective)==0) { #check that at least one data column is contained in the filter, otherwise abort
warning('Column filter does not contain any column of the fadn.raw.rds data. Operation aborted')
return(NULL)
}
if(length(col.filter)>length(col.filter.effective)) {
warning(paste0("Not all columns were found: ", paste0(col.filter[!names(col.filter)%in%col.filter.effective],collapse=",")))
}
data.cur=data.cur[,..col.filter.effective]
}
data.cur[,load.YEAR:=fadn.raw.rds.avail[FILE==f,YEAR] ]
data.cur[,load.COUNTRY:=fadn.raw.rds.avail[FILE==f,COUNTRY] ]
print(paste0("Loading from ",rds.dir, f))
if(is.null(data.return)) {
data.return=data.cur
} else {
#take care in case some columns do not exist between the rbind-ed datasets
data.return=rbindlist(list(data.return,data.cur),fill = T)
}
}
return(data.return)
}
#' Load all rds.str.FADN data for seelcted years and countries
#'
#' @param str.name The extractionname to load data from
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.str.rds = function(extraction_dir,
countries=c("all"),
years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/",extraction_dir,"/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.str.rds(extract_dir = extraction_dir);
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
fadn.str.rds.avail = fadn.str.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.str.rds.avail) {
data.cur = readRDS(paste0(rds.dir, f))
cat(paste0("Loading from ",rds.dir, f, "\n"))
if(is.null(data.return)) {
data.return=data.cur
} else {
data.return[["info"]] = rbindlist(list(data.return[["info"]],data.cur[["info"]]),fill = T)
data.return[["lvst"]][["animals"]] = rbindlist(list(data.return[["lvst"]][["animals"]],data.cur[["lvst"]][["animals"]]),fill = T)
data.return[["lvst"]][["products"]] = rbindlist(list(data.return[["lvst"]][["products"]],data.cur[["lvst"]][["products"]]),fill = T)
data.return[["crops"]] = rbindlist(list(data.return[["crops"]],data.cur[["crops"]]),fill = T)
}
}
return(data.return)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.raw = function (countries=NULL, years=NULL) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.raw.rds.avail[,paste0(rds.dir, FILE)]
file.remove(files.to.delete,showWarnings = TRUE)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.str = function (countries=c(), years=c()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.str.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.str.rds.avail[,paste0(rds.dir, FILE)]
res = file.remove(files.to.delete,showWarnings = TRUE)
}
#' Grep a pattern into a raw.rds column names
#'
#' Useful for the case where one want to look if there are certain columns present or missing
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loa
#' @param pattern a grep-like character pattern. This parameter is passed as is to the grep function
#' @param show if TRUE, the columsn are printed
#'
#' @return Prints the columns and returns them invisibly
#' @export
#'
#' @examples
grep.columns.in.raw.rds = function(pattern,countries=c("all"),years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#find what countries, years to do the grep
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail = fadn.raw.rds.avail[,FILE]
if(length(fadn.raw.rds.avail)==0) {
cat("No raw.rds for the selected countries-years")
}
#for each selected, do the grep
res = list();
for(f in fadn.raw.rds.avail) {
cat(paste0("\nGrep into ",f,"\n") )
data.cur = readRDS(paste0(rds.dir, f))
names.cur = names(data.cur)
print(names.cur[grep(pattern,names.cur)])
res[[f]] = names.cur[grep(pattern,names.cur)]
}
return(invisible(res))
}
#....................................................................
#
#The following lines includes functions related to stored rds.data,
#
# All function requires that the data.dir is already set
#....................................................................
Package: fadnUtils
Title: An R package to easily load and manipulate FADN data
Type: Package
Version: 1.0.2
Author: Dimitris Kremmydas <Dimitrios.KREMMYDAS@ec.europa.eu>
Maintainer: Dimitris Kremmydas <Dimitrios.KREMMYDAS@ec.europa.eu>
Description: Manipulate and perform data analysis with FADN data
License: Proprietary software (JRC D.4)
Encoding: UTF-8
LazyData: TRUE
Depends: R (>= 3.4.0)
Imports:
data.table,
jsonlite
RoxygenNote: 7.1.1
.svn/wc.db 0 → 100644
File added
DESCRIPTION 0 → 100644
Package: fadnUtils
Title: An R package to easily load and manipulate FADN data
Type: Package
Version: 1.0.2
Authors@R: c(person("Dimitris", "Kremmydas", role = c("aut", "cre"),
email = "Dimitrios.KREMMYDAS@ec.europa.eu"),
person("Xinxin", "Yang", role = "aut",
email = "xinxin.yang@thuenen.de"))
Author: Dimitris Kremmydas, Xinxin Yang
Maintainer: Dimitris Kremmydas <Dimitrios.KREMMYDAS@ec.europa.eu>
Description: Manipulate and perform data analysis with FADN data
License: Proprietary software (JRC D.4)
Encoding: UTF-8
LazyData: TRUE
Depends: R (>= 3.4.0)
Imports:
data.table,
jsonlite
RoxygenNote: 7.1.1
Suggests:
knitr,
rmarkdown
VignetteBuilder: knitr
NAMESPACE 0 → 100644
# Generated by roxygen2: do not edit by hand
export(analyzeFormula)
export(check.column)
export(check.data.dir.structure)
export(check.raw_str_map)
export(collect.common.id)
export(convert.to.fadn.raw.rds)
export(convert.to.fadn.str.rds)
export(create.data.dir)
export(delete.fadn.raw)
export(delete.fadn.str)
export(get.available.fadn.raw.rds)
export(get.available.fadn.str.rds)
export(get.data.dir)
export(getFormulaResult)
export(grep.columns.in.raw.rds)
export(import.fadn.csv)
export(load.fadn.raw.rds)
export(load.fadn.str.rds)
export(raw_str_map.merge)
export(set.data.dir)
export(show.data.dir.contents)
export(update_elements.DT)
export(write.excel)
import(data.table)
# Calculate Standard Results SEs --------------------------------------
fadn.calculateSE=function(data) {
seData=data.table(FID=data$tableAI$FID, SYS02=data$tableAI$WEIGHT)
#Total labour input of holding SE010----
#expressed in annual work units = full-time person equivalents.
#For casual unpaid labour:
#AVEHRS1 = { [C13(4)..17(4)] } / { [C13(3)..17(3)] }
#For casual paid labour:
#AVEHRS2 = { C19(4) + C20(4) } / { C19(3) + C20(3) }
#IF AVEHRS1 > 0 then A = #77 / AVEHRS1 for casual unpaid labour
#IF AVEHRS2 > 0 then C = #83 / AVEHRS2 for casual paid labour
#//todo
# seData$SE010=getFormulaResult(data,
# seData,
# "%3+#57+#61+#65+#69+#72+#75+#79+#81+"
# );
#SE011 Unpaid labour input ----
seData$SE011=getFormulaResult(data,seData,"#54+#58+#62+#66+#70+#73+#76+#77+#80+#82+#83");
#SE021 Paid labour Input -----
#Time worked in hours by paid labour input on holding.
seData$SE021=getFormulaResult(data,seData,"#80+#82+#83");
#SE025 Total Utilised Agricultural Area ----
seData$SE025=getFormulaResult(data,
seData,
"#48+#49+#50");
#SE030 Rented U.A.A. -----
seData$SE030=getFormulaResult(data,seData,"#49");
#SE035 Cereals -----
seData$SE030=getFormulaResult(data,seData,"K120..148(4)");
#SE041 Other field crops -----
seData$SE041=getFormulaResult(data,seData,"K129..135(4)+K142(4)+K143(4)");
#SE042 Energy crops -----
seData$SE042=getFormulaResult(data,seData,"K129..133(4:2=10)+K135(4:2=10)+K144..145(4:2=10)+
K147..148(4:2=10)+K150(4:2=10)+
K158(4:2=10)+K160..161(4:2=10)+
K284(4:2=10)+K304(4:2=10)+K330..334(4:2=10)+
K345..348(4:2=10)+K360..364(4:2=10)");
#SE046 Vegetables and flowers -----
seData$SE046=getFormulaResult(data,seData,"K136..138(4)+K140(4)+K141(4)");
#SE050 Vineyards -----
seData$SE050=getFormulaResult(data,seData,"K155(4)");
#SE054 Permanent crops -----
seData$SE054=getFormulaResult(data,seData,"K152..154(4)+K156..158(4)");
#SE055 Orchards -----
seData$SE055=getFormulaResult(data,seData,"K152..153(4)");
#SE060 Olive groves -----
seData$SE060=getFormulaResult(data,seData,"K154(4)");
#SE065 Other permanent crops -----
seData$SE065=getFormulaResult(data,seData,"K156..158(4)");
#SE071 Forage crops -----
seData$SE071=getFormulaResult(data,seData,"K144..145(4)+K147(4)+K150..151(4)");
#SE072 Agricultural fallows -----
seData$SE072=getFormulaResult(data,seData,"K146(4:2=1&3=(0,1,2,3,4,9,10))");
#SE073 Set aside -----
seData$SE073=getFormulaResult(data,seData,"K146(4:2=1&3=(5,6,7,8))");
#SE074 Total agricultural area out of production -----
seData$SE074=getFormulaResult(data,seData,"SE072+SE073+K314(4)");
#SE075 Woodland area -----
seData$SE075=getFormulaResult(data,seData,"K173(4)");
#SE080 to SE105 TODO Livestock ----
#SE110 to SE125 TODO YIELDS ----
#SE135 Total crop output ----
seData$SE135=getFormulaResult(data,
seData,
"K120..148(7..10)-K120..148(6)+
K150..161(7..10)-K150..161(6)");
#SE110 to SE125 TODO YIELDS ----
seData$SE075=getFormulaResult(data,seData,"K173(4)");
#SE206 Total livestock output ----
seData$SE206=getFormulaResult(data,
seData,
"#231+#232+#234+#235+#237+#238+#240+#241+#243+#244+#246+#247
+#249+#250+#252+#253
-#233-#236-#239-#242-#245-#248-#251-#254
+K162..171(7..10)-K162..171(6)+K313(7..10)-K313(6)");
#SE256 Other output ----
seData$SE256=getFormulaResult(data,
seData,
"K149(7..10)+K172..181(7..10)");
#SE131 Total output ----
seData$SE131=getFormulaResult(data,seData,"SE135+SE206+S256");
#SE275 Total Intermediate consumption
seData$SE275=getFormulaResult(data,seData,"#260..282+#284+#287");
#SE611 Compensatory Payments ----
seData$SE611=getFormulaResult(data,seData,"M602..614(5)+M618(5)+M622..629(5)+M632..634(5)+M638(5)+M655(5)");
#SE612 Set aside premiums ----
seData$SE612=getFormulaResult(data,seData,"M650(5)");
#SE613 Other Crop subsidies ----
seData$SE613=getFormulaResult(data,seData,"J120..145(2)+J147..161(2)+J185(2)+J281..284(2)+J296..301(2)+J326..357(2)+J360..374(2)+J952(2)");
#SE610 Total subsidies on crops ----
seData$SE610=getFormulaResult(data,seData,"SE611+SE612+SE613");
#SE616 Subsidies on dayring ----
seData$SE616=getFormulaResult(data,seData,"J30(2)+J162(2)+J163(2)+M770(5)-L401(10)");
#SE617 Subsidies other cattle ----
seData$SE617=getFormulaResult(data,seData,"J23..29(2)+J31..32(2)+J52(2)+J307(2)+M700(5)");
#SE618 Subsidies sheeps & goats ----
seData$SE618=getFormulaResult(data,seData,"K38..41(2)+J54..55(2)+J164..168(2)+J308(2)");
#SE619 Other livestock subsidies ----
seData$SE619=getFormulaResult(data,seData,"J22(2)+J33..34(2)+j43..51(2)+J56..58(2)+J169..171(2)+J309..311(2)+J313(2)+J951(2)");
#SE615 Total subsidies on livestock ----
seData$SE615=getFormulaResult(data,seData,"SE616+SE617+SE618+SE619");
#SE621 Environmental subsidies ----
seData$SE621=getFormulaResult(data,seData,"J800(2)+J810(2)");
#SE622 LFA subsidies ----
seData$SE622=getFormulaResult(data,seData,"J820(2)");
#SE623 Other Rural Development subsidies ----
seData$SE623=getFormulaResult(data,seData,"J830(2)+J835(2)+J840(2)+J900(2)+J910(2)+J953(2)");
#SE624 Total support on rural development
seData$SE624=getFormulaResult(data,seData,"SE621+SE622+SE623+J173..176(2)+J179(2)");
#SE625 Subsidies on intermediate consumption
seData$SE625=getFormulaResult(data,seData,"J59(2)+J85(2)+J89(2)");
#SE626 Subsidies on external factors ----
seData$SE626=getFormulaResult(data,seData,"J59(2)+J85(2)+J89(2)");
#SE631 Single Farm Payment ----
seData$SE631=getFormulaResult(data,seData,"J670(2)");
#SE632 Single Area Payment ----
seData$SE632=getFormulaResult(data,seData,"J680(2)");
#SE640 Additional aid
seData$SE640=getFormulaResult(data,seData,"J955(2)");
#SE630 Decoupled payments
seData$SE630=getFormulaResult(data,seData,"SE631+SE632+SE640");
#SE650 Aid for article 68
seData$SE650=getFormulaResult(data,seData,"J956(2)");
#Other subsidies
seData$SE699=getFormulaResult(data,seData,"J172(2)+J177(2)+J178(2)+J180(2)+J181(2)+J182(2)+J950(2)+J998(2)+J999(2)");
#SE605 Total subsidies excluding on investments
seData$SE605=getFormulaResult(data,seData,"SE610+SE615+SE624+SE625+SE626+SE630+SE650+SE699");
#SE395 VAT balance excluding on investments
seData$SE395=getFormulaResult(data,seData,"#402+#405-#403");
#SE390 Taxes
seData$SE390=getFormulaResult(data,seData,"#283+#288-J83(2)-J88(2)");
#SE600 Balance current subsidies & taxes
seData$SE600=getFormulaResult(data,seData,"SE605+SE395-SE390");
#SE410 Gross Farm Income
seData$SE410=getFormulaResult(data,seData,"SE131-SE275+SE600");
#SE360 Depreciation
seData$SE360=getFormulaResult(data,seData,"#300+#348+#356");
#SE415 Farm Net Value Added
seData$SE415=getFormulaResult(data,seData,"SE410-SE360");
#SE370 Wages Paid
seData$SE370=getFormulaResult(data,seData,"#259");
#SE375 Rent Paid
seData$SE375=getFormulaResult(data,seData,"#285");
#SE380 Interest Paid
seData$SE380=getFormulaResult(data,seData,"#289");
#SE365 Total External factors
seData$SE365=getFormulaResult(data,seData,"SE370+SE375+SE380");
#SE407 Payments to dairy outgoers
seData$SE407=getFormulaResult(data,seData,"J1052(2)+J2052(2)");
#SE405 Balance subsidies & taxes on investments
seData$SE405=getFormulaResult(data,seData,"#370+SE407-#404");
#SE420 Family Farm Income
seData$SE420=getFormulaResult(data,seData,"SE415-SE365+SE405");
#SE140 Cereals Output----
seData$SE140=getFormulaResult(data,seData,"K120..128(7..10)-K120..128(6)");
#SE145 TODO Protein crops Output----
#SE146 TODO energy crops Output----
#SE150 TODO Potatoes Output----
#SE155 TODO Sugar beet Output----
#SE160 TODO Oil-seed crops Output----
#SE165 TODO Industrial crops Output----
#SE170 TODO Vegetables & flowers Output----
#SE175 TODO Fruits Output----
#SE180 TODO Citrus fruit Output----
#SE185 TODO Wine and grapes Output----
#SE190 TODO Olives & olive oil Output----
#SE195 TODO Forage crops Output----
#SE200 TODO Other crop output----
return(seData)
}
#....................................................................
#
# Functions to check the variables which are used for calculating the aggregate variables in a json file and they are
# also in the csv or rds file.
# To create a txt-file containing the information of R output, and the results of unmatched variables.
#....................................................................
#' Check the variables/column names for calculating the aggregate variables
#' @details
#' If variables exist in a json-file and not in the fadn.raw.rds file or fadn csv file, then returning all unmatched variables.
#' Json file has 6 objects/categries: "id", "info", "costs", "crops", "subsides", "livstock".
#'
#' @description
#' The check.column function checks the variables if they exist in a json-file matching the variables
#' in the fadn.raw.rds or fadn.raw.csv (csv-file from FADN-AGRI),
#' returning a list of variables which are not in the raw data file. Then a new json file without unmatched variables can be saved in the extraction_dir.
#' A txt-file (my_logfile.txt) is created in a specific directory (spool.dir) where stores the output messages.
#'
#' @param importfilepath A fadn.raw.rds or fadn.raw.csv file address.
#' @param jsonfile A json file address.
#' @param rewrite_json Logical, if TRUE (default), a new json file without unmatched variables will be saved. The string "rewrite" will be added in front of the original file name, and they are separated through "_". For example, the name of original json file is A.json, the new json file will be saved as rewrite_A.json.
#' Otherwise, do not rewrite json file.
#' @param extraction_dir Extraction_dir is the folder for extracting the data.
#'
#'
#'
#' @return A list of multiple objects. The objects are in the json-file, which have the unmatched variables.
#' @export
#'
#' @author Xinxin Yang <xinxin.yang@thuenen.de>
#'
#' @examples
#' check.column("./fadn.raw.2009.BEL.rds", "./2014_after.json", TRUE, "./OV")
#' check.column("BEL2009.csv", "2013_before.json", TRUE, "./OV")
#'
#'
check.column <- function(importfilepath,
jsonfile,
rewrite_json = TRUE,
extraction_dir)
{
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir()
rds.dir = paste0(data.dir, "/rds/")
spool.dir = paste0(data.dir, "/spool/")
}
print(extraction_dir)
# check the file type and read the file
fadn.raw = tryCatch(check_file_type(importfilepath))
# read json
raw_str_map = fromJSON(paste(readLines(jsonfile), collapse = "\n"))
cat("Loading a json file...\n")
json.name = basename(jsonfile)
# get the categorie of the json file
categories.json = names(raw_str_map)
# create a empty list for saving the unmatching variables
res_final = list()
# create a list list for saving the json file
json_list = list()
for (i in categories.json)
{
message("Doing the category: ", i)
key = raw_str_map[[i]]
res_key = nested_var(key, fadn.raw)
res_final[[i]] <- res_key[[1]]
json_list[[i]] <- res_key[[2]]
}
jsonc = toJSON(json_list, auto_unbox = TRUE, pretty = TRUE)
if (rewrite_json){
json.name = paste('rewrite', json.name, sep='_')
write(jsonc, paste0(extraction_dir, "/", json.name))
cat("The new json file has been successfully written, please check the file in the current dir:", extraction_dir, ".\n")
if(length(res_final)==0)
{cat("The new json file has been no changed!")}
}
else {
cat("No json file is rewritten, all variables exist in the import file.\n")
if (length(res_final)!=0)
{
warning("Unmatched variables exist, please rewrite a new json file!!!")
}
}
# save the list of unmatched variables in a txt file
my_logfile = file(paste0(spool.dir,"my_logfile.txt" ), open = "wt") # wt open for writing or appending???
#system time output
cat("==================================================================\n",
as.character(Sys.time()),
"\n==================================================================\n",
file= my_logfile)
if (rewrite_json)
{
cat("A new jsonfile has been successfully written in the current dir:", extraction_dir, file = my_logfile, sep="\n" )
if(length(res_final)==0)
{cat("The new json file has been no changed!", file = my_logfile, sep="\n" )}
}
else{
cat("No json file is rewritten.", file = my_logfile, sep="\n" )
if (length(res_final)!=0)
{
cat("Unmatched variables exist, please rewrite a new json file!!!", file = my_logfile, sep="\n" )
}
}
cat("\nUnmatched variables: \n",file=my_logfile,sep="\n" )
cat(capture.output(res_final),file=my_logfile, sep="\n" )
return(res_final)
}
#' Check a objest in the json file
#' @description
#'
#' This function checks the node of chosen object/category for the json file and find out the variables
#' which are in json file but not in fadn.raw data file.
#' Returning two lists: unmatched variables/column names and modified json.
#' If unmatched variable exists, this variable will be deleted from the json list.
#'
#' @param var A object or category of raw json.
#' @param rds All variables/column names in fadn.raw.rds file.
#'
#' @details A json file has 6 parent objects/categories: "id", "info", "costs", "crops", "subsides", "livstock". This function checks all objects inside the parent object.
#'
#'
#'
#' @author Xinxin Yang
#'
#' @return A list of multiple objects. This list combines no machted variables and the modified json for the chosen object/category.
nested_var <- function(var, rds)
{
res= NULL
newjson = NULL
col_names = names(var)
cat("Number of the totoal objects: ", length(col_names), "\n")
for (var.key in col_names ){
var.key.map = var[[var.key]]
# id, info, costs, subsidies
if (!is.null(var.key.map[["source"]]))
{
#print("no nested, doing*****************************")
cmd = parse(text = (var.key.map[["source"]]))
extracted_element = all.vars(cmd)
for (i in extracted_element){
d = i %in% names(rds)
if (!isTRUE(d)){
cat(i, " is not in rds\n")
res = c(extracted_element, res)
#print(var.key)
# delete the unmatched vars
var[[var.key]][["source"]] = "NA"
}
}
}
# livestock, crops
else{
#print("nested! taking it down a level*****************************")
for (element in names(var.key.map[["columns"]])){
cmd = parse(text = (var.key.map[["columns"]][[element]]))
extracted_element = all.vars(cmd)
for (i in extracted_element){
d = i %in% names(rds)
if (!isTRUE(d)){
#message(i, " is not in rds")
res = c(extracted_element, res)
#delete the unmatched var
var[[var.key]][["columns"]][[element]] = "NA"
}
}
}
}
}
# remove the duplicated variables
res = res[!duplicated(res)]
newList = list("variables" = res, "json"= var)
return (newList)
}
#' Check the type of load file
#'
#' @description
#' This function checks the type of the load file and read this file.
#' If the file is not a csv or rds file,
#' the execution of the currently running R code will be stopped.
#'
#' @param filepath A rds or csv file address.
#'
#' @return A data frame with cases corresponding to lines and variables to fields in the file.
#'
check_file_type <- function(filepath)
{
if (grepl('rds$', filepath))# ends with rds?
{
cat("Loading a rds file...\n")
fadn.raw = readRDS(filepath)
}
else if( grepl('csv$', filepath))
{
cat("Loading a csv file...\n")
fadn.raw = read.csv(filepath)
}
else{
stop("ERROR: Please check the file format. It must be a file of type: rds or csv!")
}
return(fadn.raw)
}
R/common_id.R 0 → 100644
#' Collect Common id
#'
#' Load the Fadn.raw.rds data (Data Table) or Fadn.str.rds data (List),
#' then collection the common id from different years on this data.
#'@param my.r.data A data object(either a data.table or a list).
#'
#'
#' @return A data.table, it includes just one column that named "common_id".
#' @export
#' @author Xinxin Yang
#'
#' @examples
#' collect.common.id(fadn.raw.rds)
#' ## collection the common "id" from the raw rds data
#' ## for 2009-2012 years and country "BEL".
#' ## Return a DT with one column named "common_id".
#'
collect.common.id <- function(my.r.data) {
if (data.class(my.r.data)=="data.table")
{
my.data = my.r.data
}
else{
cat("Tranforming ", data.class(my.r.data), " to data table....\n")
library(tidyverse)
my.data = bind_rows(my.r.data$info)
if ( names(my.data)[1] != "ID"){
setnames(my.data, "id", "ID")
}
}
#print(my.data)
#check how many years in the DT
years = unique(my.data$YEAR)
print(years)
count_year = length(years)
cat(count_year, " year(s) is/are selected.\n")
# create a empty DT
df <- data.table(YEAR = factor(), ID = factor())
# subset the DT
for (year in years){
sel = subset(my.data,YEAR==year,select = ID)
new = data.table(YEAR = year, sel)
df = rbind(df, new)
}
# count the id
count_df = df[, .(count = .N), by = ID]
#print(count_df)
# collect the id, which count = count_year
final = count_df[count_df$count==count_year]
#rename the colname
names(final)[1] = "common_id"
return(final[,1])
}
##################
# check common id
#
# step1:
# read Fadn data
# raw.rds? str.rds?
# FADN raw data: DT
# FADN str data: List id -> ID
#
# step2:
# group by year
#
# step3:
# collect the common id for different years
###################
R/convert_data.R 0 → 100644
#' Imports a DG-AGRI csv into fadnUtils
#'
#' It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
#'
#' @param file.path the full path of the file (the filename must be included)
#' @param raw.f the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files; else do not copy
#'
#' @return NULL
#' @export
#'
#' @examples
import.fadn.csv <- function (file.path,
raw.f=NULL,
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv=F) {
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(is.null(raw.f)) {
cat("You have to give a raw_str_map. Exiting ....\n")
return(FALSE)
}
if(convert.to.fadn.raw.rds(file.path,sepS,fadn.year,fadn.country,keep.csv)) {
convert.to.fadn.str.rds(fadn.country,fadn.year,raw.f)
} else {
cat("Failed to import. Exiting ...\n")
return(invisible(NULL))
}
}
#' Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds
#'
#' It saves two files:
#' - One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
#' - One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
#'
#' @param file.path the full path of the csv file (the filename must be included)
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files to the CSV directory; else do not copy
#'
#' @return Saves the fadn.raw.rds file and returns TRUE if everything goes well
#' @import data.table
#'
#' @export
#' @examples
convert.to.fadn.raw.rds <- function(file.path="",
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv = F,
col.id = "ID") {
library(data.table)
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(FALSE)
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(file.exists(paste0(data.dir,"/csv/",csv.file))) {cat("File exists. Overwriting ...\n")}
#copy csv to data.dir/csv
if(keep.csv) {
print(" copying file")
file.copy(file.path,paste0(data.dir,"/csv/",csv.file))
}
#convert to uncompressed rds and save
print(" creating fadn.raw.rds")
data.raw = data.table(read.csv(file.path,header = T, as.is = T))
attr(data.raw,"original.file.path") <-file.path
attr(data.raw,"fadn.year")<-fadn.year
attr(data.raw,"fadn.country")<-fadn.country
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
saveRDS(data.raw,paste0(data.dir,"/rds/",data.name))
#convert to compressed rds and save
data.raw.classes = data.table(col.name=names(data.raw),col.class=sapply(data.raw,class))
data.raw.compr = list()
char.cols = c(col.id, data.raw.classes[col.class=="character",col.name])
data.raw.compr$data.char = data.raw[,..char.cols]
num.cols = c(col.id, data.raw.classes[!col.class=="character",col.name])
data.raw.compr$data.num = melt(data.raw[,..num.cols],id.vars = col.id)[!value==0]
attr(data.raw.compr,"original.file.path") <-file.path
attr(data.raw.compr,"fadn.year")<-fadn.year
attr(data.raw.compr,"fadn.country")<-fadn.country
attr(data.raw.compr,"col.names")<-names(data.raw)
attr(data.raw.compr,"col.id")<-col.id
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".compressed.rds")
saveRDS(data.raw.compr,paste0(data.dir,"/rds/",data.name))
return(invisible(TRUE))
}
#' Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file
#'
#' The raw_str_map.json specification is as follows:
#'
#' {
#' "id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
#' "info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
#' "livestock": {}
#' "crops": {
#' "CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' "CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' ....
#' }
#' }
#'
#'
#' The structure of the str.dir:
#' - A data.dir can hold more than one extractions.
#' - Each extraction has a short name (20 or less characters, whitespace is not allowed)
#' - Each extraction is stored in the data.dir/rds/<extraction_name>
#' - That folder contains the following files:
#' + raw_str_map.json: the raw_str_map
#' + fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
#'
#' Notes:
#' 1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
#' All are data.tables. For all of them, the first columns are those that are contained in the "id" object
#' "info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
#' related raw_str_map.json file.
#' "crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
#' row is a farm-crop-variableName-value combination
#'
#' 2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
#' i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
#' ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
#'
#' 3) We attach certain attributes that are useful for identifying informations:
#' i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
#' ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
#' iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
#'
#'
#'
#'
#' @param fadn.country string with the country to extract the str data
#' @param fadn.year the year to extract the structured data
#' @param raw_str_map.file the full path to the raw_str_map file.
#' @param str.short_name the short name of the str data. No spaces and text up to 20 characters
#' @param DEBUG if TRUE, prints more details on the conversion process
#'
#' @return Saves the rds.str.fadn and returns TRUE if everything goes well
#'
#' @export
#'
#' @examples
convert.to.fadn.str.rds <-function(fadn.country = NA,
fadn.year= NA,
raw_str_map.file=NULL,
force_external_raw_str_map=FALSE,
str.name = NULL,
DEBUG=F
) {
#check if str.short_name abides to specification
if(nchar(str.name)>20){
cat("str.name more should be 20 character and less. Exiting ....\n")
return(invisible(FALSE))
}
if (grepl("\\s",str.name)) {
cat("str.name should not contain any kind of whitespace. Exiting ....\n")
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(invisible(FALSE))
}
#if data.dir is a proper dat.dir
if(!check.data.dir.structure()) {
cat("data.dir does not have a proper structure. Exiting ....\n")
return(invisible(FALSE))
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
str.dir = paste0(rds.dir,"/",str.name,"/")
}
#create/get the raw_str_map.json file
if(is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
cat("either provide a raw_str_map or an existing extraction dir Exiting ....\n")
return(invisible(FALSE))
}
#create/get the raw_str_map.json file ----
##case a
if(!is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
dir.create(str.dir)
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"))#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case b
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==T) {
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"), overwrite = TRUE)#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case c
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==F) {
cat("Ignoring the provided raw_str_map.json fie. The conversion will use the existing raw_str_map from the extraction_dir");
}
raw_str_map.file = paste0(str.dir,"raw_str_map.json")
# read raw.rds file ----
#check if fadn.raw.rds exist
fadn.raw.rds.filename = paste0(rds.dir,"fadn.raw.",fadn.year,".",fadn.country,".rds")
if(!file.exists(fadn.raw.rds.filename)) {
cat("You have first to create a fadn.raw.rds file for the year and country (from a DG-AGRI csv file). Exiting ....\n")
return(FALSE)
}
fadn.raw.rds = readRDS(fadn.raw.rds.filename)
library("jsonlite")
#read raw_str_map file to a list ----
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#create empty return list
data.return = list()
attach(fadn.raw.rds) #o that eval works more efficiently
# .......................................................
#what id variables will be availble to all DT?
#create id ----
print("Doing id ...")
id.vars.list = take.raw_str_map.columns(raw_str_map$id)
id.dt = data.table()
id.dt.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
#intersting links
#
# https://stackoverflow.com/questions/28327738/evaluate-expression-in-r-data-table
start.time <- Sys.time()
for(k in names(id.vars.list)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", id.vars.list[[k]][["SOURCE"]])); }
#approach 1: 19 sec
# f <- function(e, .SD) eval(parse(text=e[1]), envir=.SD)
# id.dt.cur = fadn.raw.rds[,list(
# k=f(id.vars.list[[k]][["SOURCE"]],
# .SD)
# )]
# id.dt.cur = fadn.raw.rds[,list(k=f(id.vars.list[[k]][["SOURCE"]],.SD))]
#approach 0: 10 sec
#id.dt.cur = fadn.raw.rds[,list(k=eval(parse(text=id.vars.list[[k]][["SOURCE"]])))]
#approach 2: 0.3 sec
expr = eval(parse(text=id.vars.list[[k]][["SOURCE"]]))
id.dt.cur = fadn.raw.rds[,list(k=expr)]
setnames(id.dt.cur,k)
id.dt=cbind(id.dt,id.dt.cur)
id.dt.descriptions = rbind(id.dt.descriptions,
data.frame(COLUMN=k,
FORMULA=id.vars.list[[k]][["SOURCE"]],
DESCRIPTION = id.vars.list[[k]][["DESCRIPTION"]])
)
}
attr(id.dt,"column.descriptions") <- id.dt.descriptions;
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
# .......................................................
#create info ----
print("Doing info ...")
info = copy(id.dt)
info.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
##now for each info key, add the column
info.map = take.raw_str_map.columns(raw_str_map$info)
for(k in names(info.map)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", info.map[[k]][["SOURCE"]])); }
#info.cur = fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]][["SOURCE"]])))]
expr = eval(parse(text=info.map[[k]][["SOURCE"]]))
info.cur = fadn.raw.rds[,list(k=expr)]
#fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]])))]
setnames(info.cur,names(info.cur),k)
info=cbind(info,info.cur)
info.descriptions = rbind(info.descriptions,
data.frame(COLUMN=k,
FORMULA=info.map[[k]][["SOURCE"]],
DESCRIPTION = info.map[[k]][["DESCRIPTION"]])
)
}
attr(info,"column.descriptions") <- rbind( attr(info,"column.descriptions"), info.descriptions )
data.return$info = info;
# # .......................................................
# #create livestock-animals ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-animals ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$animals
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$animals = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
#
#
#
# # .......................................................
# #create livestock-products ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-products ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$products
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$products = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
# .......................................................
#create crops ----
print("Doing crops ...")
##now load the map
crops.map = raw_str_map$crops
##if not empty
if(length(names(crops.map))>0) {
crops.id = copy(id.dt)
crops.descriptions = data.frame( CROP=character(), DESCRIPTION=character() );
crops.column.formulas = data.frame( CROP=character(), COLUMN=character(), FORMULA=character());
crops = NULL #this is the DT to save the data
for(crop.key in names(crops.map)) { #loop each crop name (e.g. DWHE, BARL, etc.)
if(DEBUG) { print(paste0(" ", crop.key)); } else { cat(".")}
crop.key.map = crops.map[[crop.key]]
crops.descriptions = rbind(crops.descriptions, data.frame( CROP=crop.key, DESCRIPTION=crop.key.map[["description"]] ) )
for(k in names(crop.key.map[["columns"]])) { #loop each key within the crop name
cmd = parse(text=(crop.key.map[["columns"]][[k]]))
if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#crops.cur = fadn.raw.rds[,list(k=eval(parse(text=(crop.key.map[["columns"]][[k]]) )))]
expr = eval(parse(text=(crop.key.map[["columns"]][[k]]) ))
crops.cur = fadn.raw.rds[,list(k=expr)]
setnames(crops.cur,names(crops.cur),"VALUE")
crops.column.formulas = rbindlist(
list(crops.column.formulas,
data.frame(CROP= crop.key, COLUMN=k , FORMULA=crop.key.map[["columns"]][[k]])
)
);
if(is.null(crops)) {
crops = cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
}
else{
crops=rbind(crops[VALUE!=0],
cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
)
}
}
}
crops$VARIABLE = factor(crops$VARIABLE)
attr(crops,"column.formulas") <- crops.column.formulas
attr(crops,"crops.descriptions") <- crops.descriptions
data.return$crops = crops[VALUE!=0];
}
cat("\n")
detach(fadn.raw.rds)
# .......................................................
#save and return ----
data.name = paste0("fadn.str.",fadn.year,".",fadn.country,".rds")
if(file.exists(data.name)) {cat("Data exists. Overwriting ...\n")}
saveRDS(data.return,paste0(rds.dir,str.name,"/",data.name))
#copy the rds
return(invisible(TRUE))
}
#....................................................................
#
#This file includes functions related to handling (loading, saving)
# data from raw.rds files or str.rds,
# saved in the RDS subdirectory of the data.dir
#
#....................................................................
#' Load all rds.raw.FADN data for selcted years and countries (rbinds them)
#'
#' It adds two columns: load.YEAR and load.COUNTRY in each row. This can be used to group per year,country the data
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loaded
#' @param col.filter a character vector with the columns to load. If NULL, all columns are loaded. E.g columns=c('ILOTH_VET_V', 'ILVOTH_V','id')
#' @param row.filter a string giving an expression that will be evaluated in order to select rows. If NULL, all rows are returned. E.g. filter='TF8==1'
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.raw.rds = function(countries=c("all"),
years=c("all"),
col.filter = NULL,
row.filter=NULL) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail.files = fadn.raw.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.raw.rds.avail.files) {
data.cur = readRDS(paste0(rds.dir, f))
if(! is.null(row.filter)) {
data.cur=data.cur[eval(parse(text=row.filter))]
}
if(! is.null(col.filter)) { #if columns is specified, load only those
col.filter.effective = col.filter[col.filter%in%names(data.cur)]
if(length(col.filter.effective)==0) { #check that at least one data column is contained in the filter, otherwise abort
warning('Column filter does not contain any column of the fadn.raw.rds data. Operation aborted')
return(NULL)
}
if(length(col.filter)>length(col.filter.effective)) {
warning(paste0("Not all columns were found: ", paste0(col.filter[!names(col.filter)%in%col.filter.effective],collapse=",")))
}
data.cur=data.cur[,..col.filter.effective]
}
data.cur[,load.YEAR:=fadn.raw.rds.avail[FILE==f,YEAR] ]
data.cur[,load.COUNTRY:=fadn.raw.rds.avail[FILE==f,COUNTRY] ]
print(paste0("Loading from ",rds.dir, f))
if(is.null(data.return)) {
data.return=data.cur
} else {
#take care in case some columns do not exist between the rbind-ed datasets
data.return=rbindlist(list(data.return,data.cur),fill = T)
}
}
return(data.return)
}
#' Load all rds.str.FADN data for seelcted years and countries
#'
#' @param str.name The extractionname to load data from
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.str.rds = function(extraction_dir,
countries=c("all"),
years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/",extraction_dir,"/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.str.rds(extract_dir = extraction_dir);
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
fadn.str.rds.avail = fadn.str.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.str.rds.avail) {
data.cur = readRDS(paste0(rds.dir, f))
cat(paste0("Loading from ",rds.dir, f, "\n"))
if(is.null(data.return)) {
data.return=data.cur
} else {
data.return[["info"]] = rbindlist(list(data.return[["info"]],data.cur[["info"]]),fill = T)
data.return[["lvst"]][["animals"]] = rbindlist(list(data.return[["lvst"]][["animals"]],data.cur[["lvst"]][["animals"]]),fill = T)
data.return[["lvst"]][["products"]] = rbindlist(list(data.return[["lvst"]][["products"]],data.cur[["lvst"]][["products"]]),fill = T)
data.return[["crops"]] = rbindlist(list(data.return[["crops"]],data.cur[["crops"]]),fill = T)
}
}
return(data.return)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.raw = function (countries=NULL, years=NULL) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.raw.rds.avail[,paste0(rds.dir, FILE)]
file.remove(files.to.delete,showWarnings = TRUE)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.str = function (countries=c(), years=c()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.str.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.str.rds.avail[,paste0(rds.dir, FILE)]
res = file.remove(files.to.delete,showWarnings = TRUE)
}
#' Grep a pattern into a raw.rds column names
#'
#' Useful for the case where one want to look if there are certain columns present or missing
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loa
#' @param pattern a grep-like character pattern. This parameter is passed as is to the grep function
#' @param show if TRUE, the columsn are printed
#'
#' @return Prints the columns and returns them invisibly
#' @export
#'
#' @examples
grep.columns.in.raw.rds = function(pattern,countries=c("all"),years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#find what countries, years to do the grep
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail = fadn.raw.rds.avail[,FILE]
if(length(fadn.raw.rds.avail)==0) {
cat("No raw.rds for the selected countries-years")
}
#for each selected, do the grep
res = list();
for(f in fadn.raw.rds.avail) {
cat(paste0("\nGrep into ",f,"\n") )
data.cur = readRDS(paste0(rds.dir, f))
names.cur = names(data.cur)
print(names.cur[grep(pattern,names.cur)])
res[[f]] = names.cur[grep(pattern,names.cur)]
}
return(invisible(res))
}
#....................................................................
#
#The following lines includes functions related to stored rds.data,
#
# All function requires that the data.dir is already set
#....................................................................
#This file contains functions related to managing data.dir (create, set, read contents, etc.)
#' Creates a data.dir
#'
#' @param folder.path
#' @param raw_str_map.file
#' @param metadata
#'
#' @return TRUE if created succesfully; FALSE otherwise. It return in invisible mode.
#' @export
#'
#' @examples
create.data.dir = function(folder.path,
metadata = "{\n'description': 'No Description Provided',\n'created-by':'',\n'created-at':''\n}") {
if(file.exists(folder.path)) {
#if it is already a data.dir, exit
if(check.data.dir.structure(folder.path)) {
cat("This is already a data.dir structure. Doing nothing.\n")
return(invisible(FALSE));
}
}
else { #if folder does not exist, create it
if(!dir.create(folder.path)) {print("Could not create folder."); return(invisible(FALSE));}
}
#create fadnUtils.metadata ----
cat(metadata, file=paste0(folder.path,"/fadnUtils.metadata.json"))
#create DIR>csv ----
dir.create(paste0(folder.path,"/csv"))
#create DIR>rds ----
dir.create(paste0(folder.path,"/rds"))
#create DIR>spool ----
dir.create(paste0(folder.path,"/spool"))
cat(
"In this folder you can save project related files",
file = paste0(folder.path,"/spool/readme.txt")
)
return(invisible(TRUE));
}
#' Sets the data.dir
#'
#' @param new.data.dir the full path to the folder where the data.dir will be. Ending slash "/" shall not be present
#'
#' @return TRUE if succesfully set the data.dir; FALSE otherwise. Returns in invisible mode.
#' @export
#'
#' @examples
set.data.dir = function(new.data.dir) {
#check that it is a valid data.dir ----
if(!check.data.dir.structure(new.data.dir, silent = F)) {
cat("Not a valid data.dir. cannot set the folder provided.\n");
return(invisible(FALSE));
}
#set option for fadnUtils.data.dir ----
options("fadnUtils.data.dir" = new.data.dir)
#load stored.rds.env ----
stored.rds.env.path = paste0(new.data.dir,"/stored.rds.data.RData")
if(file.exists(stored.rds.env.path)) {
load(stored.rds.env.path,envir = env.stored.rds)
}
return(invisible(TRUE));
}
#' Gets the data.dir
#'
#' data.dir is the folder where data is stored
#' r package will create two subfolders:
#' csv = location to store the csv files of th DG-AGRI (fadn.raw.csv)
#' rds = location to store rds files (fadn.raw.rds, fadn.str.rds, etc.)
#'
#' @return the value of option("fadnUtils.data.dir")
#' @export
#'
#' @examples
get.data.dir = function() {
ret = getOption("fadnUtils.data.dir")
if(is.null(ret)) {
return(NULL)
} else {
if(ret=="") {
return(NULL)
} else {
return(ret)
}
}
}
#' Show the contents of data.dir
#'
#' @param data.dir a specific directory to show contents, otherwise it will read the fadnUtils.data.dir
#' @param return.list if T, returns a list, otherwise print the results
#'
#' @return returns a list containing: {description: "the description of the data dir",
#' DT of fadn.raw.rds (Country-Year) and of fadn.str.rds (country-Year)
#' @export
#'
#' @examples
show.data.dir.contents = function(data.dir=NULL, return.list=F) {
if(is.null(data.dir)) {
data.dir=get.data.dir()
}
if(!check.data.dir.structure(data.dir)) {
warning("Not a valid data.dir. Exiting ....")
return(NULL)
}
#store the results in a list
ret=list()
#get descriptio
ret[['description']]=paste(readLines(paste0(data.dir,'/fadnUtils.metadata.json'),warn = F),collapse = " ")
#get raw data
ret[["raw"]] = get.available.fadn.raw.rds()
#get extracted data
ret[["extractions"]]=list();
extr.dirs = list.dirs(path = paste0(data.dir,"/rds"), full.names = F, recursive = F)
for(d in extr.dirs) {
ret[["extractions"]][[d]]=list()
ret[["extractions"]][[d]][["contents"]] = get.available.fadn.str.rds(extract_dir = d)
}
if(return.list==T) {
return(invisible(ret));
} else {
cat("\n","Description: \n", ret[['description']])
cat("\n\nRaw data: \n")
print(dcast(ret[["raw"]] ,COUNTRY~YEAR,value.var = "COUNTRY",fun.aggregate = length))
cat("\n\nExtracted data : \n")
for(d in extr.dirs) {
cat("\n---- Extracted dir: ", d, "\n")
if(nrow(ret[["extractions"]][[d]][["contents"]])>0 ) {
print(dcast(ret[["extractions"]][[d]][["contents"]] ,COUNTRY~YEAR,value.var = "COUNTRY",fun.aggregate = length))
} else {
cat("No data present")
}
}
cat("\n\n")
}
}
#' Checks if the structure of the fadnUtils.data.dir is ok
#'
#' @param data.dir a specific directory to show contents, otherwise it will read the fadnUtils.data.dir
#' @param silent if TRUE, do not print any message
#'
#' @return TRUe if everything is ok; FALSE otherwise
#' @export
#'
#' @examples
check.data.dir.structure = function(data.dir=NULL, silent=T) {
messages = c()
if(is.null(data.dir)) {data.dir = get.data.dir()}
if(!file.exists(data.dir)) {
messages=c(messages,"Folder provided as data.dir does not exit.");
}
if(!file.exists(paste0(data.dir,"/fadnUtils.metadata.json"))) {
messages=c(messages,"Problem with data.dir: fadnUtils.metadata.json does not exist.")
}
if(!file.exists(paste0(data.dir,"/csv"))) {
messages=c(messages,"Problem with data.dir: 'csv' directory does not exist.")
}
if(!file.exists(paste0(data.dir,"/rds"))) {
messages=c(messages,"Problem with data.dir: 'rds' directory does not exist.")
}
if(length(messages)==0) {return(invisible(TRUE))}
if(!silent) {
cat(messages,sep = "\n")
}
return(invisible(FALSE))
}
#' Returns the available YEAR-COUNTRY fadn.raw.rds
#'
#' @return a DT of the available YEAR-COUNTRY fadn.raw.rds
#'
#' @export
#'
#' @examples
get.available.fadn.raw.rds = function(data.dir=NULL) {
if(is.null(data.dir)) {data.dir=get.data.dir()}
if(is.null(data.dir)) {
warning("Either provide explicitly a fadnUtils.data.dir to the function orfirst to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
}
rds.dir = paste0(data.dir,"/rds/")
rds.avail.files = list.files(rds.dir,pattern = "fadn.raw.*.rds")
pattern = "fadn[.]raw[.](\\d*)[.](\\S*)[.]rds"
fadn.raw.rds.avail = data.table(
YEAR = gsub(pattern,"\\1",rds.avail.files),
COUNTRY = sub(pattern,"\\2",rds.avail.files)
)
return(fadn.raw.rds.avail)
}
#' Returns the available YEAR-COUNTRY fadn.str.rds, for each str.folder
#'
#' @return DT of the available YEAR-COUNTRY fadn.str.rds
#' @param extract_dir The name of the extraction dir
#'
#' @export
#'
#' @examples
get.available.fadn.str.rds = function(data.dir=NULL,extract_dir) {
if(is.null(data.dir)) {data.dir=get.data.dir()}
if(is.null(data.dir)) {
warning("Either provide explicitly a fadnUtils.data.dir or set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
}
rds.dir = paste0(data.dir,"/rds/",extract_dir)
rds.avail.files = list.files(rds.dir,pattern = "fadn.str.*.rds")
pattern = "fadn[.]str[.](\\d+)[.](\\S+)[.]rds"
fadn.str.rds.avail = data.table(
YEAR = gsub(pattern,"\\1",rds.avail.files),
COUNTRY = gsub(pattern,"\\2",rds.avail.files)
)
return(fadn.str.rds.avail)
}
R/raw_str_map.R 0 → 100644
#This file contains functions related to managing raw_str_map files
#' Merges two raw_str_map files and returns either a list or a file
#'
#' All entries in the new.raw_str_map file replace those on the source.raw_str_map file
#'
#' Both files must be relative to the current data.dir/raw_str_maps
#'
#' @param source.raw_str_map.file the filename of the source raw_str_map. It must be relative the raw_str_maps of the current data.dir
#' @param new.raw_str_map.file the filename of the mask raw_str_map. It will replace any entries of the source file. It must be relative the raw_str_maps of the current data.dir
#' @param return.file If set to T, a temporary full file path that contains the merge is returned. Otherwise a list with the contents of the merge is returned
#'
#' @return FALSE in case of problem / if return.file=T, the temporary full path of a file that contains the merged result in json / A list with the contents of the merge if return.file=F
#' @export
#'
#' @examples
raw_str_map.merge = function(source.raw_str_map.file=NULL,
new.raw_str_map.file=NULL,
return.file=F){
#Check that data.dir exist and raw_str_map files exist ----
## check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(invisible(FALSE))
}
##if data.dir is a proper dat.dir
if(check.data.dir.structure()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
raw_str_map.file = paste0(data.dir,'/raw_str_map.json')
}
##check if raw
source.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",source.raw_str_map.file)
new.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",new.raw_str_map.file)
if(!file.exists(source.raw_str_map.path) | !file.exists(new.raw_str_map.path)) {
warning("One or more of the raw_str_maps provided, do not exist. Exiting ....")
return(FALSE)
}
#read json files to a list ----
library("jsonlite")
source.raw_str_map = fromJSON(paste(readLines(source.raw_str_map.path), collapse="\n"))
new.raw_str_map = fromJSON(paste(readLines(new.raw_str_map.path), collapse="\n"))
#merge both files----
merged.raw_str_map = modifyList(source.raw_str_map,new.raw_str_map)
#save or export results----
if(return.file) {
#save to temporary file as JSON
f.tmp = tempfile(pattern = "file", tmpdir = tempdir())
cat(
toJSON(merged.raw_str_map, auto_unbox = T),
"\n",
file=f.tmp
)
return(f.tmp)
}
else {
return(merged.raw_str_map)
}
}
#' Takes $id, $info, $costs objects of a raw_str_map object and create Source-Description pairs
#'
#' Used internally
#'
#' @param listcontent
#'
#' @return list(COLUMN-NAME = c(SOURCE=csv column name, DESCRIPTION=description of column), ..... )
#'
#' @examples
take.raw_str_map.columns = function( listcontent ) {
str.cols = names(listcontent)
splitcol = function(x) {
if(is.list(x)) {
return(c("SOURCE"=x$source, "DESCRIPTION"=x$description))
} else {
return(c("SOURCE"=x, "DESCRIPTION"=""))}
}
return(
lapply(listcontent,splitcol)
)
}
#' Checks if the definitions of a raw_str_map are compatible with a fadn.raw.rds for a certain year and country
#'
#' Checks if all values are actual columns of the fadn.raw.rds file
#'
#' @param fadn.country
#' @param fadn.year
#' @param raw_str_map.file The full filepath of the raw_str_map
#'
#' @return
#' @export
#'
#' @examples
check.raw_str_map = function(raw_str_map.file,
fadn.country=NA,
fadn.year=NA) {
#TODO
#check if raw_str_map exist
#TODO
#check if fadn.raw.rds exist
#read file
raw_str_map.file = paste0(data.dir,"/raw_str_maps/",raw_str_map.file)
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#get all keys
map.keys.orig = c( unlist(raw_str_map$id),
unlist(raw_str_map$info),
unlist(raw_str_map$crops,recursive = T)
)
#split all characters
map.keys.orig.split = unlist(
strsplit(map.keys.orig,
"\\+|\\-|,|ifelse|\\(|\\)|\\>|\\/|\\<"),
recursive = T
)
#remove whitespaces and empty
map.keys.orig.split=trimws(map.keys.orig.split)
map.keys.orig.split=map.keys.orig.split[!map.keys.orig.split==""]
#get the column listing of fadn.raw.rds
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
rds.data = readRDS(paste0(rds.dir,data.name))
rds.data.columns = colnames(rds.data)
#print what is in raw_str_map but not in fadn.raw.rds
cat(paste0("What is in raw_str_map but not in ",data.name,"\n"))
map.keys.orig.split[!map.keys.orig.split%in%rds.data.columns]
}