GitLab at IIASA

7b0c198ff38c1763b5bee3452f2a9eed4c82f77b.svn-base 7.1 KiB
Newer Older
Xinxin Yang's avatar
Xinxin Yang committed
#This file contains functions related to managing data.dir (create, set, read contents, etc.)


#' Creates a data.dir
#'
#' @param folder.path
#' @param raw_str_map.file
#' @param metadata
#'
#' @return TRUE if created succesfully; FALSE otherwise. It return in invisible mode.
#' @export
#'
#' @examples
create.data.dir = function(folder.path,
                           metadata = "{\n'description': 'No Description Provided',\n'created-by':'',\n'created-at':''\n}") {


  if(file.exists(folder.path)) {
    #if it is already a data.dir, exit
    if(check.data.dir.structure(folder.path)) {
      cat("This is already a data.dir structure. Doing nothing.\n")
      return(invisible(FALSE));
    }
  }
  else { #if folder does not exist, create it
    if(!dir.create(folder.path)) {print("Could not create folder."); return(invisible(FALSE));}
  }


  #create fadnUtils.metadata ----
  cat(metadata, file=paste0(folder.path,"/fadnUtils.metadata.json"))


  #create DIR>csv ----
  dir.create(paste0(folder.path,"/csv"))

  #create DIR>rds ----
  dir.create(paste0(folder.path,"/rds"))


  #create DIR>spool ----
  dir.create(paste0(folder.path,"/spool"))
  cat(
    "In this folder you can save project related files",
    file = paste0(folder.path,"/spool/readme.txt")
  )


  return(invisible(TRUE));

}



#' Sets the data.dir
#'
#' @param new.data.dir the full path to the folder where the data.dir will be. Ending slash "/" shall not be present
#'
#' @return TRUE if succesfully set the data.dir; FALSE otherwise. Returns in invisible mode.
#' @export
#'
#' @examples
set.data.dir = function(new.data.dir) {

  #check that it is a valid data.dir ----
  if(!check.data.dir.structure(new.data.dir, silent = F)) {
    cat("Not a valid data.dir. cannot set the folder provided.\n");
    return(invisible(FALSE));
  }

  #set option for fadnUtils.data.dir ----
  options("fadnUtils.data.dir" = new.data.dir)


  #load stored.rds.env ----
  stored.rds.env.path = paste0(new.data.dir,"/stored.rds.data.RData")
  if(file.exists(stored.rds.env.path)) {
    load(stored.rds.env.path,envir = env.stored.rds)
  }


  return(invisible(TRUE));

}


#' Gets the data.dir
#'
#' data.dir is the folder where data is stored
#' r package will create two subfolders:
#' csv = location to store the csv files of th DG-AGRI (fadn.raw.csv)
#' rds = location to store rds files (fadn.raw.rds, fadn.str.rds, etc.)
#'
#' @return the value of option("fadnUtils.data.dir")
#' @export
#'
#' @examples
get.data.dir = function() {

  ret = getOption("fadnUtils.data.dir")

  if(is.null(ret)) {
    return(NULL)
  } else {
    if(ret=="") {
      return(NULL)
    } else {
      return(ret)
    }

  }


}


#' Show the contents of data.dir
#'
#' @param data.dir a specific directory to show contents, otherwise it will read the fadnUtils.data.dir
#' @param return.list if T, returns a list, otherwise print the results
#'
#' @return  returns a list containing: {description: "the description of the data dir",
#'                                        DT of fadn.raw.rds (Country-Year) and of fadn.str.rds (country-Year)
#' @export
#'
#' @examples
show.data.dir.contents = function(data.dir=NULL, return.list=F) {

  if(is.null(data.dir)) {
    data.dir=get.data.dir()
  }

  if(!check.data.dir.structure(data.dir)) {
    warning("Not a valid data.dir. Exiting ....")
    return(NULL)
  }

  #store the results in a list
  ret=list()

  #get descriptio
  ret[['description']]=paste(readLines(paste0(data.dir,'/fadnUtils.metadata.json'),warn = F),collapse = " ")

  #get raw data
  ret[["raw"]] = get.available.fadn.raw.rds()


  #get extracted data
  ret[["extractions"]]=list();

  extr.dirs = list.dirs(path = paste0(data.dir,"/rds"), full.names = F, recursive = F)

  for(d in extr.dirs) {
    ret[["extractions"]][[d]]=list()
    ret[["extractions"]][[d]][["contents"]] = get.available.fadn.str.rds(extract_dir = d)

  }


  if(return.list==T) {
    return(invisible(ret));

  } else {

    cat("\n","Description: \n", ret[['description']])

    cat("\n\nRaw data: \n")
    print(dcast(ret[["raw"]] ,COUNTRY~YEAR,value.var = "COUNTRY",fun.aggregate = length))

    cat("\n\nExtracted data : \n")
    for(d in extr.dirs) {
      cat("\n---- Extracted dir: ", d, "\n")
      if(nrow(ret[["extractions"]][[d]][["contents"]])>0 ) {
        print(dcast(ret[["extractions"]][[d]][["contents"]] ,COUNTRY~YEAR,value.var = "COUNTRY",fun.aggregate = length))
      } else {
        cat("No data present")
      }


    }

    cat("\n\n")


  }



}



#' Checks if the structure of the fadnUtils.data.dir is ok
#'
#' @param data.dir a specific directory to show contents, otherwise it will read the fadnUtils.data.dir
#' @param silent if TRUE, do not print any message
#'
#' @return TRUe if everything is ok; FALSE otherwise
#' @export
#'
#' @examples
check.data.dir.structure = function(data.dir=NULL, silent=T) {

  messages = c()

  if(is.null(data.dir)) {data.dir = get.data.dir()}

  if(!file.exists(data.dir)) {
    messages=c(messages,"Folder provided as data.dir does not exit.");
  }

  if(!file.exists(paste0(data.dir,"/fadnUtils.metadata.json"))) {
    messages=c(messages,"Problem with data.dir: fadnUtils.metadata.json does not exist.")
  }


  if(!file.exists(paste0(data.dir,"/csv"))) {
    messages=c(messages,"Problem with data.dir: 'csv' directory does not exist.")
  }

  if(!file.exists(paste0(data.dir,"/rds"))) {
    messages=c(messages,"Problem with data.dir: 'rds' directory does not exist.")
  }


  if(length(messages)==0) {return(invisible(TRUE))}

  if(!silent) {
    cat(messages,sep = "\n")
  }

  return(invisible(FALSE))

}




#' Returns the available YEAR-COUNTRY fadn.raw.rds
#'
#' @return a DT of the available YEAR-COUNTRY fadn.raw.rds
#'
#' @export
#'
#' @examples
get.available.fadn.raw.rds = function(data.dir=NULL) {

  if(is.null(data.dir)) {data.dir=get.data.dir()}

  if(is.null(data.dir)) {
    warning("Either provide explicitly a fadnUtils.data.dir to the function orfirst to set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
    return(FALSE)
  }

  rds.dir = paste0(data.dir,"/rds/")

  rds.avail.files = list.files(rds.dir,pattern = "fadn.raw.*.rds")

  pattern = "fadn[.]raw[.](\\d*)[.](\\S*)[.]rds"

  fadn.raw.rds.avail = data.table(
    YEAR = gsub(pattern,"\\1",rds.avail.files),
    COUNTRY = sub(pattern,"\\2",rds.avail.files)
  )

  return(fadn.raw.rds.avail)


}


#' Returns the available YEAR-COUNTRY fadn.str.rds, for each str.folder
#'
#' @return  DT of the available YEAR-COUNTRY fadn.str.rds
#' @param extract_dir The name of the extraction dir
#'
#' @export
#'
#' @examples
get.available.fadn.str.rds = function(data.dir=NULL,extract_dir) {

  if(is.null(data.dir)) {data.dir=get.data.dir()}

  if(is.null(data.dir)) {
    warning("Either provide explicitly a fadnUtils.data.dir or set the fadnUtils.data.dir using set.data.dir function. Exiting ....")
    return(FALSE)
  }


  rds.dir = paste0(data.dir,"/rds/",extract_dir)

  rds.avail.files = list.files(rds.dir,pattern = "fadn.str.*.rds")

  pattern = "fadn[.]str[.](\\d+)[.](\\S+)[.]rds"

  fadn.str.rds.avail =  data.table(
    YEAR = gsub(pattern,"\\1",rds.avail.files),
    COUNTRY = gsub(pattern,"\\2",rds.avail.files)
  )

  return(fadn.str.rds.avail)


}