Xinxin Yang's avatar
Xinxin Yang committed

#....................................................................
#
#This file includes functions related to handling (loading, saving)
# data from raw.rds files or str.rds,
# saved in the RDS subdirectory of the data.dir
#
#....................................................................


#' Load all rds.raw.FADN data for selcted years and countries (rbinds them)
#'
#' It adds two columns: load.YEAR and load.COUNTRY in each row. This can be used to group per year,country the data
#'
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years a numeric vector with the years selected. If "all" is included, all available years are loaded
#' @param col.filter a character vector with the columns to load. If NULL, all columns are loaded. E.g columns=c('ILOTH_VET_V', 'ILVOTH_V','id')
#' @param row.filter a string giving an expression that will be evaluated in order to select rows. If NULL, all rows are returned. E.g. filter='TF8==1'
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.raw.rds = function(countries=c("all"),
                             years=c("all"),
                             col.filter = NULL,
                             row.filter=NULL) {


  # check for fadnUtils.data.dir
  if(is.null(get.data.dir())) {
    warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
    return(FALSE)
  } else {
    data.dir = get.data.dir();
    rds.dir = paste0(data.dir,"/rds/")
  }


  #select the COUNTRY-YEAR to return
  rds.avail = get.available.fadn.raw.rds();

  if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)[nchar(unique(rds.avail$COUNTRY)) == 3] }
Xinxin Yang's avatar
Xinxin Yang committed

  if("all"%in%years) {years= unique(rds.avail$YEAR)}

  fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]

  fadn.raw.rds.avail[,FILE:=paste0("fadn.raw.",YEAR,".",COUNTRY,".rds")]

  fadn.raw.rds.avail.files = fadn.raw.rds.avail[,FILE]

  #Rbind the
  data.return = NULL;

  for(f in fadn.raw.rds.avail.files) {

    data.cur = readRDS(paste0(rds.dir, f))

    if(! is.null(row.filter)) {
      data.cur=data.cur[eval(parse(text=row.filter))]
    }

    if(! is.null(col.filter)) { #if columns is specified, load only those

      col.filter.effective = col.filter[col.filter%in%names(data.cur)]

      if(length(col.filter.effective)==0) { #check that at least one data column is contained in the filter, otherwise abort
        warning('Column filter does not contain any column of the fadn.raw.rds data. Operation aborted')
        return(NULL)
      }
      if(length(col.filter)>length(col.filter.effective)) {
        warning(paste0("Not all columns were found: ", paste0(col.filter[!names(col.filter)%in%col.filter.effective],collapse=",")))
      }

      data.cur=data.cur[,..col.filter.effective]
    }

    data.cur[,load.YEAR:=fadn.raw.rds.avail[FILE==f,YEAR]    ]
    data.cur[,load.COUNTRY:=fadn.raw.rds.avail[FILE==f,COUNTRY]    ]

    print(paste0("Loading from ",rds.dir, f))

    if(is.null(data.return)) {

      data.return=data.cur

    } else {

      #take care in case some columns do not exist between the rbind-ed datasets
      data.return=rbindlist(list(data.return,data.cur),fill = T)

    }

  }

  return(data.return)


}



#' Load all rds.str.FADN data for seelcted years and countries
#'
#' @param str.name The extractionname to load data from
#' @param countries a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
#' If "all" is included, all available countries are loaded
#' @param years
#'
#' @return list( "countries"=> c(<RETURNED COUNTRIES), "years"=>c(<AVAILABLE YEARS) )
#' @export
#'
#' @examples
#'
load.fadn.str.rds = function(extraction_dir,
                             countries=c("all"),
                             years=c("all")) {


  # check for fadnUtils.data.dir
  if(is.null(get.data.dir())) {
    warning("You have first to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
    return(FALSE)
  } else {
    data.dir = get.data.dir();
    rds.dir = paste0(data.dir,"/rds/",extraction_dir,"/")
  }


  #select the COUNTRY-YEAR to return
  rds.avail = get.available.fadn.str.rds(extract_dir = extraction_dir);

  if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}

  if("all"%in%years) {years= unique(rds.avail$YEAR)}

  fadn.str.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]

  fadn.str.rds.avail[,FILE:=paste0("fadn.str.",YEAR,".",COUNTRY,".rds")]

  fadn.str.rds.avail = fadn.str.rds.avail[,FILE]

  #Rbind the
  data.return = NULL;

  for(f in fadn.str.rds.avail) {

    data.cur = readRDS(paste0(rds.dir, f))

    cat(paste0("Loading from ",rds.dir, f, "\n"))

    if(is.null(data.return)) {

      data.return=data.cur


    } else {

      data.return[["info"]] = rbindlist(list(data.return[["info"]],data.cur[["info"]]),fill = T)

      data.return[["lvst"]][["animals"]] = rbindlist(list(data.return[["lvst"]][["animals"]],data.cur[["lvst"]][["animals"]]),fill = T)

      data.return[["lvst"]][["products"]] = rbindlist(list(data.return[["lvst"]][["products"]],data.cur[["lvst"]][["products"]]),fill = T)

      data.return[["crops"]] = rbindlist(list(data.return[["crops"]],data.cur[["crops"]]),fill = T)

    }

  }
  return(data.return)


}



#' Title
#'
#' @param countries
#' @param years
#'
#' @return
#' @export
#'
#' @examples
delete.fadn.raw = function (countries=NULL, years=NULL) {

  data.dir = get.data.dir();
  rds.dir = paste0(data.dir,"/rds/")

  rds.avail = get.available.fadn.raw.rds();

  if("all"%in%countries) {countries= unique(rds.avail$COUNTRY)}

  if("all"%in%years) {years= unique(rds.avail$YEAR)}

  fadn.raw.rds.avail = rds.avail[YEAR%in%years & COUNTRY%in%countries]