#This file contains functions related to managing raw_str_map files
#' Merges two raw_str_map files and returns either a list or a file
#'
#' All entries in the new.raw_str_map file replace those on the source.raw_str_map file
#'
#' Both files must be relative to the current data.dir/raw_str_maps
#'
#' @param source.raw_str_map.file the filename of the source raw_str_map. It must be relative the raw_str_maps of the current data.dir
#' @param new.raw_str_map.file the filename of the mask raw_str_map. It will replace any entries of the source file. It must be relative the raw_str_maps of the current data.dir
#' @param return.file If set to T, a temporary full file path that contains the merge is returned. Otherwise a list with the contents of the merge is returned
#'
#' @return FALSE in case of problem / if return.file=T, the temporary full path of a file that contains the merged result in json / A list with the contents of the merge if return.file=F
#' @export
#'
#' @examples
raw_str_map.merge = function(source.raw_str_map.file=NULL,
new.raw_str_map.file=NULL,
return.file=F){
#Check that data.dir exist and raw_str_map files exist ----
## check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(invisible(FALSE))
}
##if data.dir is a proper dat.dir
if(check.data.dir.structure()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
raw_str_map.file = paste0(data.dir,'/raw_str_map.json')
}
##check if raw
source.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",source.raw_str_map.file)
new.raw_str_map.path = paste0(data.dir,"/raw_str_maps/",new.raw_str_map.file)
if(!file.exists(source.raw_str_map.path) | !file.exists(new.raw_str_map.path)) {
warning("One or more of the raw_str_maps provided, do not exist. Exiting ....")
return(FALSE)
}
#read json files to a list ----
library("jsonlite")
source.raw_str_map = fromJSON(paste(readLines(source.raw_str_map.path), collapse="\n"))
new.raw_str_map = fromJSON(paste(readLines(new.raw_str_map.path), collapse="\n"))
#merge both files----
merged.raw_str_map = modifyList(source.raw_str_map,new.raw_str_map)
#save or export results----
if(return.file) {
#save to temporary file as JSON
f.tmp = tempfile(pattern = "file", tmpdir = tempdir())
cat(
toJSON(merged.raw_str_map, auto_unbox = T),
"\n",
file=f.tmp
)
return(f.tmp)
}
else {
return(merged.raw_str_map)
}
}
#' Takes $id, $info, $costs objects of a raw_str_map object and create Source-Description pairs
#'
#' Used internally
#'
#' @param listcontent
#'
#' @return list(COLUMN-NAME = c(SOURCE=csv column name, DESCRIPTION=description of column), ..... )
#'
#' @examples
take.raw_str_map.columns = function( listcontent ) {
str.cols = names(listcontent)
splitcol = function(x) {
if(is.list(x)) {
return(c("SOURCE"=x$source, "DESCRIPTION"=x$description))
} else {
return(c("SOURCE"=x, "DESCRIPTION"=""))}
}
return(
lapply(listcontent,splitcol)
)
}
#' Checks if the definitions of a raw_str_map are compatible with a fadn.raw.rds for a certain year and country
#'
#' Checks if all values are actual columns of the fadn.raw.rds file
#'
#' @param fadn.country
#' @param fadn.year
#' @param raw_str_map.file The full filepath of the raw_str_map
#'
#' @return
#' @export
#'
#' @examples
check.raw_str_map = function(raw_str_map.file,
fadn.country=NA,
fadn.year=NA) {
#TODO
#check if raw_str_map exist
#TODO
#check if fadn.raw.rds exist
#read file
raw_str_map.file = paste0(data.dir,"/raw_str_maps/",raw_str_map.file)
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#get all keys
map.keys.orig = c( unlist(raw_str_map$id),
unlist(raw_str_map$info),
unlist(raw_str_map$crops,recursive = T)
)
#split all characters
map.keys.orig.split = unlist(
strsplit(map.keys.orig,
"\\+|\\-|,|ifelse|\\(|\\)|\\>|\\/|\\<"),
recursive = T
)
#remove whitespaces and empty
map.keys.orig.split=trimws(map.keys.orig.split)
map.keys.orig.split=map.keys.orig.split[!map.keys.orig.split==""]
#get the column listing of fadn.raw.rds
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
rds.data = readRDS(paste0(rds.dir,data.name))
rds.data.columns = colnames(rds.data)
#print what is in raw_str_map but not in fadn.raw.rds
cat(paste0("What is in raw_str_map but not in ",data.name,"\n"))
map.keys.orig.split[!map.keys.orig.split%in%rds.data.columns]
}
#' Imports a DG-AGRI csv into fadnUtils
#'
#' It first call the convert.to.fadn.raw.rds and then convert.to.fadn.str.rds
#'
#' @param file.path the full path of the file (the filename must be included)
#' @param raw.f the raw_str_map file to use. it must reside inside 'raw_str_maps; folder of the data.dir
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files; else do not copy
#'
#' @return NULL
#' @export
#'
#' @examples
import.fadn.csv <- function (file.path,
raw.f=NULL,
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv=F) {
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(is.null(raw.f)) {
cat("You have to give a raw_str_map. Exiting ....\n")
return(FALSE)
}
if(convert.to.fadn.raw.rds(file.path,sepS,fadn.year,fadn.country,keep.csv)) {
convert.to.fadn.str.rds(fadn.country,fadn.year,raw.f)
} else {
cat("Failed to import. Exiting ...\n")
return(invisible(NULL))
}
}
#' Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds
#'
#' It saves two files:
#' - One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
#' - One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
#'
#' @param file.path the full path of the csv file (the filename must be included)
#' @param sepS the separator of the csv files (by default ",")
#' @param fadn.year the year the csv files refers to (e.g. 2001)
#' @param fadn.country the three letter country code the csv files refers to (e.g. "ELL")
#' @param keep.csv if TRUE, copy the csv files to the CSV directory; else do not copy
#'
#' @return Saves the fadn.raw.rds file and returns TRUE if everything goes well
#' @import data.table
#'
#' @export
#' @examples
convert.to.fadn.raw.rds <- function(file.path="",
sepS=",",
fadn.year= NA,
fadn.country = NA,
keep.csv = F,
col.id = "ID") {
library(data.table)
#if file exist
if(!file.exists(file.path)) {
cat(paste0("File ",file.path," does not exist. Exiting ...\n"))
return(FALSE)
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(FALSE)
} else {
data.dir = get.data.dir();
csv.file = basename(file.path)
}
if(file.exists(paste0(data.dir,"/csv/",csv.file))) {cat("File exists. Overwriting ...\n")}
#copy csv to data.dir/csv
if(keep.csv) {
print(" copying file")
file.copy(file.path,paste0(data.dir,"/csv/",csv.file))
}
#convert to uncompressed rds and save
print(" creating fadn.raw.rds")
data.raw = data.table(read.csv(file.path,header = T, as.is = T))
attr(data.raw,"original.file.path") <-file.path
attr(data.raw,"fadn.year")<-fadn.year
attr(data.raw,"fadn.country")<-fadn.country
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".rds")
saveRDS(data.raw,paste0(data.dir,"/rds/",data.name))
#convert to compressed rds and save
data.raw.classes = data.table(col.name=names(data.raw),col.class=sapply(data.raw,class))
data.raw.compr = list()
char.cols = c(col.id, data.raw.classes[col.class=="character",col.name])
data.raw.compr$data.char = data.raw[,..char.cols]
num.cols = c(col.id, data.raw.classes[!col.class=="character",col.name])
data.raw.compr$data.num = melt(data.raw[,..num.cols],id.vars = col.id)[!value==0]
attr(data.raw.compr,"original.file.path") <-file.path
attr(data.raw.compr,"fadn.year")<-fadn.year
attr(data.raw.compr,"fadn.country")<-fadn.country
attr(data.raw.compr,"col.names")<-names(data.raw)
attr(data.raw.compr,"col.id")<-col.id
data.name = paste0("fadn.raw.",fadn.year,".",fadn.country,".compressed.rds")
saveRDS(data.raw.compr,paste0(data.dir,"/rds/",data.name))
return(invisible(TRUE))
}
#' Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file
#'
#' The raw_str_map.json specification is as follows:
#'
#' {
#' "id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
#' "info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
#' "livestock": {}
#' "crops": {
#' "CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' "CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
#' ....
#' }
#' }
#'
#'
#' The structure of the str.dir:
#' - A data.dir can hold more than one extractions.
#' - Each extraction has a short name (20 or less characters, whitespace is not allowed)
#' - Each extraction is stored in the data.dir/rds/<extraction_name>
#' - That folder contains the following files:
#' + raw_str_map.json: the raw_str_map
#' + fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
#'
#' Notes:
#' 1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
#' All are data.tables. For all of them, the first columns are those that are contained in the "id" object
#' "info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
#' related raw_str_map.json file.
#' "crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
#' row is a farm-crop-variableName-value combination
#'
#' 2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
#' i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
#' ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
#'
#' 3) We attach certain attributes that are useful for identifying informations:
#' i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
#' ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
#' iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
#'
#'
#'
#'
#' @param fadn.country string with the country to extract the str data
#' @param fadn.year the year to extract the structured data
#' @param raw_str_map.file the full path to the raw_str_map file.
#' @param str.short_name the short name of the str data. No spaces and text up to 20 characters
#' @param DEBUG if TRUE, prints more details on the conversion process
#'
#' @return Saves the rds.str.fadn and returns TRUE if everything goes well
#'
#' @export
#'
#' @examples
convert.to.fadn.str.rds <-function(fadn.country = NA,
fadn.year= NA,
raw_str_map.file=NULL,
force_external_raw_str_map=FALSE,
str.name = NULL,
DEBUG=F
) {
#check if str.short_name abides to specification
if(nchar(str.name)>20){
cat("str.name more should be 20 character and less. Exiting ....\n")
return(invisible(FALSE))
}
if (grepl("\\s",str.name)) {
cat("str.name should not contain any kind of whitespace. Exiting ....\n")
return(invisible(FALSE))
}
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
cat("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....\n")
return(invisible(FALSE))
}
#if data.dir is a proper dat.dir
if(!check.data.dir.structure()) {
cat("data.dir does not have a proper structure. Exiting ....\n")
return(invisible(FALSE))
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
str.dir = paste0(rds.dir,"/",str.name,"/")
}
#create/get the raw_str_map.json file
if(is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
cat("either provide a raw_str_map or an existing extraction dir Exiting ....\n")
return(invisible(FALSE))
}
#create/get the raw_str_map.json file ----
##case a
if(!is.null(raw_str_map.file) & !file.exists(paste0(str.dir,"raw_str_map.json"))) {
dir.create(str.dir)
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"))#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case b
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==T) {
file.copy(raw_str_map.file,paste0(str.dir,"raw_str_map.json"), overwrite = TRUE)#copy the file
cat("\n", raw_str_map.file, " copied to ", paste0(str.dir,"raw_str_map.json\n"))
}
##case c
if(!is.null(raw_str_map.file) & file.exists(paste0(str.dir,"raw_str_map.json")) & force_external_raw_str_map==F) {
cat("Ignoring the provided raw_str_map.json fie. The conversion will use the existing raw_str_map from the extraction_dir");
}
raw_str_map.file = paste0(str.dir,"raw_str_map.json")
# read raw.rds file ----
#check if fadn.raw.rds exist
fadn.raw.rds.filename = paste0(rds.dir,"fadn.raw.",fadn.year,".",fadn.country,".rds")
if(!file.exists(fadn.raw.rds.filename)) {
cat("You have first to create a fadn.raw.rds file for the year and country (from a DG-AGRI csv file). Exiting ....\n")
return(FALSE)
}
fadn.raw.rds = readRDS(fadn.raw.rds.filename)
library("jsonlite")
#read raw_str_map file to a list ----
raw_str_map = fromJSON(paste(readLines(raw_str_map.file), collapse="\n"))
#create empty return list
data.return = list()
attach(fadn.raw.rds) #o that eval works more efficiently
# .......................................................
#what id variables will be availble to all DT?
#create id ----
print("Doing id ...")
id.vars.list = take.raw_str_map.columns(raw_str_map$id)
id.dt = data.table()
id.dt.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
#intersting links
#
# https://stackoverflow.com/questions/28327738/evaluate-expression-in-r-data-table
start.time <- Sys.time()
for(k in names(id.vars.list)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", id.vars.list[[k]][["SOURCE"]])); }
#approach 1: 19 sec
# f <- function(e, .SD) eval(parse(text=e[1]), envir=.SD)
# id.dt.cur = fadn.raw.rds[,list(
# k=f(id.vars.list[[k]][["SOURCE"]],
# .SD)
# )]
# id.dt.cur = fadn.raw.rds[,list(k=f(id.vars.list[[k]][["SOURCE"]],.SD))]
#approach 0: 10 sec
#id.dt.cur = fadn.raw.rds[,list(k=eval(parse(text=id.vars.list[[k]][["SOURCE"]])))]
#approach 2: 0.3 sec
expr = eval(parse(text=id.vars.list[[k]][["SOURCE"]]))
id.dt.cur = fadn.raw.rds[,list(k=expr)]
setnames(id.dt.cur,k)
id.dt=cbind(id.dt,id.dt.cur)
id.dt.descriptions = rbind(id.dt.descriptions,
data.frame(COLUMN=k,
FORMULA=id.vars.list[[k]][["SOURCE"]],
DESCRIPTION = id.vars.list[[k]][["DESCRIPTION"]])
)
}
attr(id.dt,"column.descriptions") <- id.dt.descriptions;
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
# .......................................................
#create info ----
print("Doing info ...")
info = copy(id.dt)
info.descriptions = data.frame(COLUMN=character(), FORMULA=character(), DESCRIPTION = character());
##now for each info key, add the column
info.map = take.raw_str_map.columns(raw_str_map$info)
for(k in names(info.map)) {
if(DEBUG) { print(paste0(" doing ", k, " = ", info.map[[k]][["SOURCE"]])); }
#info.cur = fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]][["SOURCE"]])))]
expr = eval(parse(text=info.map[[k]][["SOURCE"]]))
info.cur = fadn.raw.rds[,list(k=expr)]
#fadn.raw.rds[,list(k=eval(parse(text=info.map[[k]])))]
setnames(info.cur,names(info.cur),k)
info=cbind(info,info.cur)
info.descriptions = rbind(info.descriptions,
data.frame(COLUMN=k,
FORMULA=info.map[[k]][["SOURCE"]],
DESCRIPTION = info.map[[k]][["DESCRIPTION"]])
)
}
attr(info,"column.descriptions") <- rbind( attr(info,"column.descriptions"), info.descriptions )
data.return$info = info;
# # .......................................................
# #create livestock-animals ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-animals ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$animals
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$animals = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
#
#
#
# # .......................................................
# #create livestock-products ----
#
# if(DEBUG){cat("\n")}
# print("Doing livestock-products ...")
#
# ##now load the map
# lvst.animals.map = raw_str_map$livestock$products
#
# ##if not empty
# if(length(names(lvst.animals.map))>0) {
# lvst.animals.id = copy(id.dt)
# lvst.animals.descriptions = data.frame( LIVESTOCK=character(), DESCRIPTION=character() );
# lvst.animals.column.formulas = data.frame( LIVESTOCK=character(), COLUMN=character(), FORMULA=character());
#
# lvst.animals = NULL #this is the DT to save the data
#
#
# for(lvst.animal.key in names(lvst.animals.map)) { #loop each lvst.animals name (e.g. DCOW, SCOW, etc.)
#
# if(DEBUG) { print(paste0(" ", lvst.animal.key)); } else { cat(".")}
#
# lvst.animal.key.map = lvst.animals.map[[lvst.animal.key]]
#
# lvst.animals.descriptions = rbind(lvst.animals.descriptions,
# data.frame( LIVESTOCK=lvst.animal.key,
# DESCRIPTION=lvst.animal.key.map[["description"]] ) )
#
# for(k in names(lvst.animal.key.map[["columns"]])) { #loop each key within the animal name
#
# cmd = parse(text=(lvst.animal.key.map[["columns"]][[k]]))
# if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#
# #lvst.animals.cur = fadn.raw.rds[,list(k=eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) )))]
# expr = eval(parse(text=(lvst.animal.key.map[["columns"]][[k]]) ))
# lvst.animals.cur = fadn.raw.rds[,list(k=expr)]
#
#
# setnames(lvst.animals.cur,names(lvst.animals.cur),"VALUE")
#
# lvst.animals.column.formulas = rbind(lvst.animals.column.formulas,
# data.frame(LIVESTOCK= lvst.animal.key, COLUMN=k , FORMULA=lvst.animal.key.map[["columns"]][[k]])
# );
#
# if(is.null(lvst.animals)) {
# lvst.animals = cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# }
# else{
# lvst.animals=rbind(lvst.animals[VALUE!=0],
# cbind(lvst.animals.id,LIVESTOCK=lvst.animal.key,VARIABLE=k,lvst.animals.cur)
# )
# }
#
# }
#
# }
#
# lvst.animals$VARIABLE = factor(lvst.animals$VARIABLE)
# attr(lvst.animals,"column.formulas") <- lvst.animals.column.formulas
# attr(lvst.animals,"livestock.descriptions") <- lvst.animals.descriptions
# data.return$lvst$products = lvst.animals[VALUE!=0];
#
# if(!DEBUG){cat("\n")}
#
# }
# .......................................................
#create crops ----
print("Doing crops ...")
##now load the map
crops.map = raw_str_map$crops
##if not empty
if(length(names(crops.map))>0) {
crops.id = copy(id.dt)
crops.descriptions = data.frame( CROP=character(), DESCRIPTION=character() );
crops.column.formulas = data.frame( CROP=character(), COLUMN=character(), FORMULA=character());
crops = NULL #this is the DT to save the data
for(crop.key in names(crops.map)) { #loop each crop name (e.g. DWHE, BARL, etc.)
if(DEBUG) { print(paste0(" ", crop.key)); } else { cat(".")}
crop.key.map = crops.map[[crop.key]]
crops.descriptions = rbind(crops.descriptions, data.frame( CROP=crop.key, DESCRIPTION=crop.key.map[["description"]] ) )
for(k in names(crop.key.map[["columns"]])) { #loop each key within the crop name
cmd = parse(text=(crop.key.map[["columns"]][[k]]))
if(DEBUG) { print(paste0(" running ",k," = ", cmd)); }
#crops.cur = fadn.raw.rds[,list(k=eval(parse(text=(crop.key.map[["columns"]][[k]]) )))]
expr = eval(parse(text=(crop.key.map[["columns"]][[k]]) ))
crops.cur = fadn.raw.rds[,list(k=expr)]
setnames(crops.cur,names(crops.cur),"VALUE")
crops.column.formulas = rbindlist(
list(crops.column.formulas,
data.frame(CROP= crop.key, COLUMN=k , FORMULA=crop.key.map[["columns"]][[k]])
)
);
if(is.null(crops)) {
crops = cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
}
else{
crops=rbind(crops[VALUE!=0],
cbind(crops.id,CROP=crop.key,VARIABLE=k,crops.cur)
)
}
}
}
crops$VARIABLE = factor(crops$VARIABLE)
attr(crops,"column.formulas") <- crops.column.formulas
attr(crops,"crops.descriptions") <- crops.descriptions
data.return$crops = crops[VALUE!=0];
}
cat("\n")
detach(fadn.raw.rds)
# .......................................................
#save and return ----
data.name = paste0("fadn.str.",fadn.year,".",fadn.country,".rds")
if(file.exists(data.name)) {cat("Data exists. Overwriting ...\n")}
saveRDS(data.return,paste0(rds.dir,str.name,"/",data.name))
#copy the rds
return(invisible(TRUE))
}
#.................................................................................................#
#
# Load all csv files from DG-AGRI
#
#.................................................................................................#
#import also other files, fadn.raw.rds to fadn.str.rds
import.fadn.csv(paste0(dg.agri.dir,"ELL2009.csv"),
fadn.year = 2009,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2010.csv"),
fadn.year = 2010,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2011.csv"),
fadn.year = 2011,
fadn.country = "ELL")
import.fadn.csv(paste0(dg.agri.dir,"ELL2012.csv"),
fadn.year = 2012,
fadn.country = "ELL")
# ESP
import.fadn.csv(paste0(dg.agri.dir,"ESP2007.csv"),
fadn.year = 2007,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2008.csv"),
fadn.year = 2008,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2009.csv"),
fadn.year = 2009,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2010.csv"),
fadn.year = 2010,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2011.csv"),
fadn.year = 2011,
fadn.country = "ESP")
import.fadn.csv(paste0(dg.agri.dir,"ESP2012.csv"),
fadn.year = 2012,
fadn.country = "ESP")
#check what is loaded
show.data.dir.contents()
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/raw_str_map.R
\name{take.raw_str_map.columns}
\alias{take.raw_str_map.columns}
\title{Takes $id, $info, $costs objects of a raw_str_map object and create Source-Description pairs}
\usage{
take.raw_str_map.columns(listcontent)
}
\arguments{
\item{listcontent}{}
}
\value{
list(COLUMN-NAME = c(SOURCE=csv column name, DESCRIPTION=description of column), ..... )
}
\description{
Used internally
}
#....................................................................
#
#This file includes functions related to handling (loading, saving)
# data from raw.rds files or str.rds,
# saved in the RDS subdirectory of the data.dir
#
#....................................................................
#' Load all rds.raw.FADN data for selcted years and countries (rbinds them)
#'
#' It adds two columns: load.YEAR and load.COUNTRY in each row. This can be used to group per year,country the data
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loaded
#' @param col.filter a character vector with the columns to load. If NULL, all columns are loaded. E.g columns=c('ILOTH_VET_V', 'ILVOTH_V','id')
#' @param row.filter a string giving an expression that will be evaluated in order to select rows. If NULL, all rows are returned. E.g. filter='TF8==1'
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.raw.rds = function(countries=c("all"),
years=c("all"),
col.filter = NULL,
row.filter=NULL) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail.files = fadn.raw.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.raw.rds.avail.files) {
data.cur = readRDS(paste0(rds.dir, f))
if(! is.null(row.filter)) {
data.cur=data.cur[eval(parse(text=row.filter))]
}
if(! is.null(col.filter)) { #if columns is specified, load only those
col.filter.effective = col.filter[col.filter%in%names(data.cur)]
if(length(col.filter.effective)==0) { #check that at least one data column is contained in the filter, otherwise abort
warning('Column filter does not contain any column of the fadn.raw.rds data. Operation aborted')
return(NULL)
}
if(length(col.filter)>length(col.filter.effective)) {
warning(paste0("Not all columns were found: ", paste0(col.filter[!names(col.filter)%in%col.filter.effective],collapse=",")))
}
data.cur=data.cur[,..col.filter.effective]
}
data.cur[,load.YEAR:=fadn.raw.rds.avail[FILE==f,YEAR] ]
data.cur[,load.COUNTRY:=fadn.raw.rds.avail[FILE==f,COUNTRY] ]
print(paste0("Loading from ",rds.dir, f))
if(is.null(data.return)) {
data.return=data.cur
} else {
#take care in case some columns do not exist between the rbind-ed datasets
data.return=rbindlist(list(data.return,data.cur),fill = T)
}
}
return(data.return)
}
#' Load all rds.str.FADN data for seelcted years and countries
#'
#' @param str.name The extractionname to load data from
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.str.rds = function(extraction_dir,
countries=c("all"),
years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/",extraction_dir,"/")
}
#select the COUNTRY-YEAR to return
rds.avail = get.available.fadn.str.rds(extract_dir = extraction_dir);
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
fadn.str.rds.avail = fadn.str.rds.avail[,FILE]
#Rbind the
data.return = NULL;
for(f in fadn.str.rds.avail) {
data.cur = readRDS(paste0(rds.dir, f))
cat(paste0("Loading from ",rds.dir, f, "\n"))
if(is.null(data.return)) {
data.return=data.cur
} else {
data.return[["info"]] = rbindlist(list(data.return[["info"]],data.cur[["info"]]),fill = T)
data.return[["lvst"]][["animals"]] = rbindlist(list(data.return[["lvst"]][["animals"]],data.cur[["lvst"]][["animals"]]),fill = T)
data.return[["lvst"]][["products"]] = rbindlist(list(data.return[["lvst"]][["products"]],data.cur[["lvst"]][["products"]]),fill = T)
data.return[["crops"]] = rbindlist(list(data.return[["crops"]],data.cur[["crops"]]),fill = T)
}
}
return(data.return)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.raw = function (countries=NULL, years=NULL) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.raw.rds.avail[,paste0(rds.dir, FILE)]
file.remove(files.to.delete,showWarnings = TRUE)
}
#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.str = function (countries=c(), years=c()) {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
rds.avail = get.available.fadn.str.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]
files.to.delete = fadn.str.rds.avail[,paste0(rds.dir, FILE)]
res = file.remove(files.to.delete,showWarnings = TRUE)
}
#' Grep a pattern into a raw.rds column names
#'
#' Useful for the case where one want to look if there are certain columns present or missing
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loa
#' @param pattern a grep-like character pattern. This parameter is passed as is to the grep function
#' @param show if TRUE, the columsn are printed
#'
#' @return Prints the columns and returns them invisibly
#' @export
#'
#' @examples
grep.columns.in.raw.rds = function(pattern,countries=c("all"),years=c("all")) {
# check for fadnUtils.data.dir
if(is.null(get.data.dir())) {
warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
return(FALSE)
} else {
data.dir = get.data.dir();
rds.dir = paste0(data.dir,"/rds/")
}
#find what countries, years to do the grep
rds.avail = get.available.fadn.raw.rds();
if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}
if("all"%in%years) {years= unique(rds.avail$YEAR)}
fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]
fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]
fadn.raw.rds.avail = fadn.raw.rds.avail[,FILE]
if(length(fadn.raw.rds.avail)==0) {
cat("No raw.rds for the selected countries-years")
}
#for each selected, do the grep
res = list();
for(f in fadn.raw.rds.avail) {
cat(paste0("\nGrep into ",f,"\n") )
data.cur = readRDS(paste0(rds.dir, f))
names.cur = names(data.cur)
print(names.cur[grep(pattern,names.cur)])
res[[f]] = names.cur[grep(pattern,names.cur)]
}
return(invisible(res))
}
#....................................................................
#
#The following lines includes functions related to stored rds.data,
#
# All function requires that the data.dir is already set
#....................................................................
Package: fadnUtils
Title: An R package to easily load and manipulate FADN data
Type: Package
Version: 1.0.2
Author: Dimitris Kremmydas <Dimitrios.KREMMYDAS@ec.europa.eu>
Maintainer: Dimitris Kremmydas <Dimitrios.KREMMYDAS@ec.europa.eu>
Description: Manipulate and perform data analysis with FADN data
License: Proprietary software (JRC D.4)
Encoding: UTF-8
LazyData: TRUE
Depends: R (>= 3.4.0)
Imports:
data.table,
jsonlite
RoxygenNote: 7.1.1
File deleted
...@@ -18,6 +18,7 @@ export(grep.columns.in.raw.rds) ...@@ -18,6 +18,7 @@ export(grep.columns.in.raw.rds)
export(import.fadn.csv) export(import.fadn.csv)
export(load.fadn.raw.rds) export(load.fadn.raw.rds)
export(load.fadn.str.rds) export(load.fadn.str.rds)
export(nuts.heatmap.group)
export(raw_str_map.merge) export(raw_str_map.merge)
export(set.data.dir) export(set.data.dir)
export(show.data.dir.contents) export(show.data.dir.contents)
......
# Nuts Transformation
#'
#'
#'
#'
#'nuts heatmap output
#' @param group.by regional levels (fadn region, NUTS1, NUTS2 and NUTS3)
#' @param fadn.data.info
#' @author Yang
#' @describeIn
#' @export
#' @examples
nuts.heatmap.group <- function(group.by, fadn.data.info){
fadn.data.info = str_data$info
#todo: check folder
#create DIR>plots ----
dir.create(paste0(CurrentProjectDirectory,"/plot"))
# create group folder ---
dir.create(paste0(CurrentProjectDirectory,"/plot/", "fadn_",deparse(substitute(group.by)),"_plots"))
countries <- unique(fadn.data.info$COUNTRY)
for (country in countries){
heatmap_data <- fadn.data.info %>%
filter(COUNTRY == country) %>%
count({{group.by}},YEAR) %>%
arrange(YEAR) %>%
# pivot_wider(names_from = YEAR,values_from=n) %>%
mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1)))
# pivot_longer(c(`2004`:`2018` ), names_to = "YEAR", values_to = "n")
heatmap_data <- data.frame(lapply(heatmap_data,as.character))
if (NROW(heatmap_data$NUTS3 %>% unique()) >100 ) {
text.size = 3
} else{text.size = 11}
p <- heatmap_data %>% ggplot(aes(YEAR, {{group.by}}, fill= n)) + geom_tile() +
theme(legend.position="none") +
ggtitle(country)+ theme(axis.text.y = element_text(size = text.size))
#
# p_name <- country
#
# assign(p_name, heatmap_data %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
# theme(legend.position="none") +
# ggtitle(country))
ggsave(plot = p ,
filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_plots/",country,".png"),
width = 18, height = 8)
}
}
# fadnUtils
Develop by Dimitrios Kremmydas (JRC) and Xinxin Yang (THÜNEN)
The fadnUtils package facilitates the efficient handling of FADN data within the R language framework. Furthermore, the package is targeted for use within the JRC D.4 context. This means that there is a specific temporal pattern of how a user interacts with the package (see Figure plot).
![plot](inst/examples/pic/workflow.png)
More specifically, after a request for FADN data from DG-AGRI, this data is delivered to JRC D.4 in csv format.
# Installation
You can install the development version from Gitlab with:
```{r}
devtools::install_git("https://git-dmz.thuenen.de/mindstep/fadnutilspackages")
```
Then the Related R packages can be installed.
```{r}
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2')
for(p in requiredPackages){
if(!require(p,character.only = TRUE)) install.packages(p)
library(p,character.only = TRUE)
}
```
# Usage in Brief
After loaded the packages, you will have a functinal R package on your computer. Then, we will talk about using your package online.
1. Create a working directory
- a user-defined data directory
1. Import CSV FADN data
- convert the csv data into raw r-data
- convert raw r-data into str r-data
1. Load r-data and structured r-data
1. Perform analysis
## 1. Create a working directory
Frist, User sets a working directory. Make sure the relative path stays within `CurrentProjectDirectory`.
```{r}
# using a local directory
CurrentProjectDirectory = "D:/public/yang/MIND_STEP/New_test_fadnUtils"
create.data.dir(folder.path = CurrentProjectDirectory)
set.data.dir(CurrentProjectDirectory)
get.data.dir()
```
### Required files
We request FADN data from DG-AGRI, which is delivered to us in csv format. In order to work efficiently with R, we should convert the csv-data to an r friendly format, this step is done with help of a human-readable file, called `raw_str_map.file`. Both files are necessary. `inst/examples` is the folder for use cases that contain fadnUtils package examples and json files.
1. FADN data in csv format: the data for loading
2. A json file for extracting the variables
### Folder Structure
A working directory is specified arbitrarily by the user. This structure helps data management and maintenance. The directory looks like this:
```base
CurrentProjectDirectory/
+-- csv
+-- fadnUtils.metadata.json
+-- rds
\-- spool
\-- readme.txt
```
* csv: CSV files are stored here
* fadnUtils.metadata.json: containing the mapping from the fadn.raw.rds to the fadn.str.rds data
* rds: placing r-data in the "rds" directory
* spool: keeping related files
## 2. Import CSV FADN data
First, we will import the data into an R-friendly format using the fadnUtils package.
### Convert the csv data into raw r-data
The raw data will be added to a `rds` directory. We use a convenient function from this package to convert the csv file into raw r-data.
```{r}
fadn.data.dir <- "D:/public/data/fadn/lieferung_20210414/csv/"
# load data for country BEL and year 2009
convert.to.fadn.raw.rds(
file.path = paste0(fadn.data.dir, "BEL2009.csv"),
sepS = ",",
fadn.country = "BEL",
fadn.year = 2009
#keep.csv = T # copy csv file in csv.dir
)
```
At any time, we can check for the current data dir, what csv files (countries, year) are loaded.
```{r}
show.data.dir.contents()
```
### Convert raw r-data into structured r-data
Then, We convert raw data into structured data. Broadly, there are 3 steps to including data in an R package:
1. setting a structured data in the `structured` directory,
2. checking the `raw_str_map.file` that all variables can be converted.
3. converting the structured data successfully into `structured` directory.
#### Set a `structured` directory for saving the structured data
We set a `test` folder to placing the structured data.
```{r}
rds.dir = paste0(get.data.dir(),"/rds/")
# set a structured name for for saving the structured r-data in rds.dir
new.str.name = "test"
# set a extraction_dir
dir.create(paste0(rds.dir, new.str.name))
new.extraction.dir = paste0(rds.dir, new.str.name)
```
#### Check the variables in the `raw_str_map.file`
Before conversion it is recommended to use `check.column()` method, ensuring that all variables in the`raw_str_map.file` can be converted.
```{r results='hide', message=FALSE, warning=FALSE}
list_vars = check.column(
# a rds file or a csv file
importfilepath = paste0(rds.dir, "fadn.raw.2009.BEL.rds"),
# a json file
jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json",
# write a new json file without unmatched variables
rewrite_json = T,
# save the new json in extraction_dir
extraction_dir = new.extraction.dir)
```
#### Convert the raw data into structured r-data using the checked json file
Finally, We can convert a raw r-data to str r-data using a external json file. For more details on converting in fadnUtils packages, `see USE_CASE.R`.
```{r}
convert.to.fadn.str.rds(fadn.country = "BEL",
fadn.year = 2009,
str.name = new.str.name # extraction_dir
)
convert.to.fadn.str.rds(fadn.country = "BEL",
fadn.year = 2009,
raw_str_map.file = "D:/public/yang/MIND_STEP/new_sample/test01/raw_str_map.json", # a external json file
str.name = new.str.name, # extraction_dir
force_external_raw_str_map = T,
DEBUG = F
)
```
#### Files Structure in `rds` folder
After conversion, we can see the `rds` folder:
* `fadn.raw.2009.BEL.rds`: raw r-data for country "BEL" and year "2009"
* `test`: extraction_dir for saving the structured r-data and extracting json file
* `fadn.str.2009.BEL.rds`: structured s-data for for country of "BEL" and year of "2009"
* `raw_str_map.json`: default json file
* `rewrite_2014_after_copy.json`: modified json file after checking the variables
```base
rds
+-- fadn.raw.2009.BEL.compressed.rds
+-- fadn.raw.2009.BEL.rds
+-- fadn.raw.2010.BEL.compressed.rds
+-- fadn.raw.2010.BEL.rds
+-- fadn.raw.2011.BEL.compressed.rds
+-- fadn.raw.2011.BEL.rds
+-- fadn.raw.2012.BEL.compressed.rds
+-- fadn.raw.2012.BEL.rds
\-- test
+-- fadn.str.2009.BEL.rds
+-- raw_str_map.json
\-- rewrite_2014_after_copy.json
```
## 3. Load raw r-data and structured r-data
In order to initiate any analysis with `fadnUtils`, we first need to load r-data. We can only load data for countries and years that that has already been imported into a data.dir folder.
### Load raw r-data for the country `BEL` and year `2009`
```{r results='hide', message=FALSE, warning=FALSE}
my.data.2009.raw = load.fadn.raw.rds(
countries = "BEL",
years = 2009
)
```
### Load structured data for the country `BEL` and year `2009`
We can load structured from country `BEL` and year `2009`.
```{r results='hide', message=FALSE, warning=FALSE}
my.data.2009.str = load.fadn.str.rds(
countries = "BEL",
years = 2009,
extraction_dir = "test" # Location of the str r-data
)
```
### Load structured data from all available countries and years.
The following is an example of loading structured data all available countries and years.
```{r results='hide', message=FALSE, warning=FALSE}
my.str.data = load.fadn.str.rds( extraction_dir = "a")
```
## 4. Perform analysis
Here are some examples to perform data.
### Collection the common id
We can collect the common id from the loaded r-data using `collect.common.id()` function on `fadnUtils`.
```{r, message=FALSE}
# Collection the common id from loaded structured r-data
collected.common.id_str = collect.common.id(my.str.data)
```
### Plotting
To build a basic plot, we will use the `ggplot` function using the plotting package
`ggplot2`.
```{r results='hide', message=FALSE, warning=FALSE}
crops.data = my.str.data$crops #catering for easier access at next steps
#this contains the number of crops for each farm-country-year/
# Be carefule, we hav to filter to count only the LEVL variable
crops.data.Ncrops = crops.data[VARIABLE=="LEVL",.N,by=list(COUNTRY,YEAR,ID)]
# This displays the quantiles of the number of crops
crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)]
ggplot(crops.data.Ncrops,aes(y=N,x=1)) +
geom_boxplot() +
facet_grid(YEAR~COUNTRY) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()
)+
ylab("Number of Crops")
```
### Some other examples
```{r}
# sample and representend number of farms
my.str.data$info[,list(Nobs_sample=.N,Nobs_represented=sum(WEIGHT)),
by=.(COUNTRY,YEAR)]
# only for full sample (common id over years in selected data)
my.str.data$info[ID %in% collected.common.id_str[[1]],
list(Nobs_sample=.N,
Nobs_represented=sum(WEIGHT)),
by=.(COUNTRY,YEAR)]
```
**Notices:** Please read `inst/examples/FADN_USE_CASE.R` and `use_case.docx` for more details on using fadnUtils.
rm(list=ls())
library(fadnUtils)
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2', 'tidyverse','ggplot2')
for(p in requiredPackages){
if(!require(p,character.only = TRUE)) install.packages(p)
library(p,character.only = TRUE)
}
CurrentProjectDirectory = "D:/data/fadn/lieferung_20210414/yang/fadn_work_space"
set.data.dir(CurrentProjectDirectory)
# load str data
str.dir <- "str_dir"
str_data <- readRDS(paste0(get.data.dir(),"/rds/",str.dir,"/fadn.str.all.rds"))
nuts.heatmap.group <- function(group.by, fadn.data.info){
fadn.data.info = str_data$info
#create DIR>plots ----
dir.create(paste0(CurrentProjectDirectory,"/plot"))
# create group folder ---
dir.create(paste0(CurrentProjectDirectory,"/plot/", "fadn_",deparse(substitute(group.by)),"_plots"))
countries <- unique(fadn.data.info$COUNTRY)
for (country in countries){
heatmap_data <- fadn.data.info %>%
filter(COUNTRY == country) %>%
count({{group.by}},YEAR) %>%
arrange(YEAR) %>%
# pivot_wider(names_from = YEAR,values_from=n) %>%
mutate(across(3:last_col(),function(x)ifelse(is.na(x),0,1)))
# pivot_longer(c(`2004`:`2018` ), names_to = "YEAR", values_to = "n")
heatmap_data <- data.frame(lapply(heatmap_data,as.character))
if (NROW(heatmap_data$NUTS3 %>% unique()) >100 ) {
text.size = 3
} else{text.size = 11}
p <- heatmap_data %>% ggplot(aes(YEAR, {{group.by}}, fill= n)) + geom_tile() +
theme(legend.position="none") +
ggtitle(country)+ theme(axis.text.y = element_text(size = text.size))
#
# p_name <- country
#
# assign(p_name, heatmap_data %>% ggplot ( aes(YEAR,NUTS2, fill= n)) + geom_tile() +
# theme(legend.position="none") +
# ggtitle(country))
ggsave(plot = p ,
filename = paste0(CurrentProjectDirectory,"/plot/","fadn_",deparse(substitute(group.by)),"_plots/",country,".png"),
width = 18, height = 8)
}
}
fadnUtils::nuts.heatmap.group(NUTS1,str_data$info)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nuts_converter.R
\name{nuts.heatmap.group}
\alias{nuts.heatmap.group}
\title{nuts heatmap output}
\usage{
nuts.heatmap.group(group.by, fadn.data.info)
}
\arguments{
\item{group.by}{regional levels (fadn region, NUTS1, NUTS2 and NUTS3)}
}
\description{
nuts heatmap output
}
\author{
Yang
}