Xinxin Yang's avatar
Xinxin Yang committed
############################################################################
###                                                                      ###
###                              FADNUTILS:                              ###
###         AN R PACKAGE TO EASILY LOAD AND MANIPULATE FADN DATA         ###
###                                                                      ###
############################################################################


##  In order to use fadnUtils, we must load fadnUtils and other packages.

## R version >=3.6.1 and < 4.0.0 

## Author: 

############################################################################
###                                                                      ###
###                                SETUP:                                ###
###                 DIRECTORY NAMES AND CONDITIONAL FILE                 ###
###                                                                      ###
############################################################################
# fadnUtils always work with a user defined data.dir
# Let's assume that the user has not created one yet.
# The following line creates a data.dir folder somewhere in our computer
# We must also have created the raw_str_map.file and pass it as an argument
# to the function. This file is copied to the data.dir folder. Thus, we can
# see the structure of the data contained in a data.dir folder by inspecting
# the raw_str_map.file residing in it.

##################################################################
##                  Install and load packages                   ##
##################################################################
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2')
for(p in requiredPackages){
  if(!require(p,character.only = TRUE)) install.packages(p)
  library(p,character.only = TRUE)
}


#################################################################
##                       DIRECTORY NAMES                       ##
#################################################################

CurrentProjectDirectory = "D:/public/yang/MIND_STEP/New_test_fadnUtils"

##################################################################
##                        Required files                        ##
##################################################################

# the path of the fadn files for loading
fadn.data.dir = "D:/public/data/fadn/lieferung_20210414/csv/"

# A json file for extraction

# ceate a data.dir
create.data.dir(folder.path = CurrentProjectDirectory)

# Once the data.dir is created, we must declare that we are working with it
set.data.dir(CurrentProjectDirectory)

get.data.dir()

# After you create a data dir, below is a list of "real-world" example files:
# CurrentProjectDirectory/
# +-- csv
# +-- fadnUtils.metadata.json
# +-- rds
# \-- spool
#     \-- readme.txt


############################################################################
###                                                                      ###
###                              SECTION 1:                              ###
###                         IMPORT CSV FADN DATA                         ###
###                                                                      ###
############################################################################


# .............. IMPORT DATA IN TWO STEPS ..........................................#

# However, you can import the file in two steps, one for converting
# the csv to fadn.raw.str (csv-data to raw r-data) and
# one for converting the fadn.raw.rds to fadn.str.rds (raw r-data
# to structured r-data).


#################################################################
##             STEP 1: CONVERT CSV TO FADN.RAW.RDS             ##
#################################################################

##-----------------------------
## load each file separately
##-----------------------------
# load for a specific country "DEU" and from a specific year "2009"
convert.to.fadn.raw.rds(
  file.path = paste0(fadn.data.dir ,"DEU2009.csv"),
  sepS = ",",
  fadn.country = "DEU",
  fadn.year = 2009
  #keep.csv = T # copy csv file in csv.dir
  
)

##-----------------------------
## load all csv files in a folder
##-----------------------------
"csv2raw function takes csv files in a folder and converts them into raw data"
allcsv2raw <- function(LocationofCSVFiles){
  
  # list all csv files 
  csv_file_names <- list.files(path = LocationofCSVFiles, pattern= "*.csv$")
  
  #csv_file_names <- "DEU, BEL"
  for (file in csv_file_names){
    # extract first 3 char
    country = substr(file, 1, 3)
    
    # extract 4-7 char
    year = substr(file, 4, 7)
    #year = as.numeric(gsub("\\D+", "", file))
    
    convert.to.fadn.raw.rds(
      file.path = paste0(fadn.data.dir,file),
      sepS = ",",
      fadn.country = country,
      fadn.year = year
      #keep.csv = T # copy csv file in csv.dir
      
    )
    
  }
}


allcsv2raw(fadn.data.dir)

##-----------------------------
## load specific year and country
##-----------------------------


"C.Y2raw function takes selected countries and years, then converts them into raw data"
C.Y2raw <- function(countries, years){
  for (country in countries){
    for (year in years){
      
      file = paste0(country,year,".csv")
      
      convert.to.fadn.raw.rds(
        file.path = paste0(fadn.data.dir,file),
        sepS = ",",
        fadn.country = country,
        fadn.year = year
        #keep.csv = T # copy csv file in csv.dir
        
      )
      
    }
  }
}

# load countries: BEL, DEU and NED
countriesList = c("BEL", "DEU", "NED")
yearsList = c(2009,2010,2011,2018)
C.Y2raw(countries = countriesList, years =yearsList )



show.data.dir.contents()


# If you converted the csv to raw r-data successfully, raw r-data files are saved in "rds" folder,
# the project's files and folders look like this: 

# New_test_fadnUtils/
# +-- csv
# +-- fadnUtils.metadata.json
# +-- rds
# |   +-- fadn.raw.2009.BEL.compressed.rds
# |   +-- fadn.raw.2009.BEL.rds
# |   +-- fadn.raw.2010.BEL.compressed.rds
# |   +-- fadn.raw.2010.BEL.rds
# |   +-- fadn.raw.2011.BEL.compressed.rds
# |   +-- fadn.raw.2011.BEL.rds
# |   +-- fadn.raw.2012.BEL.compressed.rds
# |   \-- fadn.raw.2012.BEL.rds
# \-- spool
#     \-- readme.txt


##################################################################
##         STEP 2: CONVERT FADN.RAW.RDS TO FADN.STR.RDS         ##
##################################################################

#######################################################################################################
# Notices:#
###########
## Before converting raw r-data into str r-data, it is recommended to use check.column() method   
## so that all variables in this json file can be converted.      
## The conversion of the raw r-data file to a structured r-data file is driven by a human-readable file,