|
|
---
|
|
|
|
|
title: "fadnutils Package - use case"
|
|
|
|
|
author: Xinxin Yang
|
|
|
|
|
date: "`r format(Sys.time(), '%d %B %Y')`"
|
|
|
|
|
output:
|
|
|
|
|
word_document:
|
|
|
|
|
reference_docx: "D:/public/yang/MIND_STEP/docs/MIND STEP - kind of template.docx"
|
|
|
|
|
---
|
|
|
|
|
|
|
|
|
|
```{r setup, include=FALSE}
|
|
|
|
|
knitr::opts_chunk$set(echo = TRUE)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Setup
|
|
|
|
|
|
|
|
|
|
```{r setup_fadnutils, results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
## In order to use fadnUtils, we must load fadnUtils and other packages.
|
|
|
|
|
|
|
|
|
|
## R version >=3.6.1 and < 4.0.0
|
|
|
|
|
|
|
|
|
|
# fadnUtils always work with a user defined data.dir
|
|
|
|
|
# Let's assume that the user has not created one yet.
|
|
|
|
|
# The following line creates a data.dir folder somewhere in our computer
|
|
|
|
|
# We must also have created the raw_str_map.file and pass it as an argument
|
|
|
|
|
# to the function. This file is copied to the data.dir folder. Thus, we can
|
|
|
|
|
# see the structure of the data contained in a data.dir folder by inspecting
|
|
|
|
|
# the raw_str_map.file residing in it.
|
|
|
|
|
|
|
|
|
|
##################################################################
|
|
|
|
|
## Install and load packages ##
|
|
|
|
|
##################################################################
|
|
|
|
|
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2')
|
|
|
|
|
for(p in requiredPackages){
|
|
|
|
|
if(!require(p,character.only = TRUE)) install.packages(p)
|
|
|
|
|
library(p,character.only = TRUE)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#################################################################
|
|
|
|
|
## DIRECTORY NAMES ##
|
|
|
|
|
#################################################################
|
|
|
|
|
|
|
|
|
|
CurrentProjectDirectory = "D:/public/yang/MIND_STEP/New_test_fadnUtils"
|
|
|
|
|
|
|
|
|
|
##################################################################
|
|
|
|
|
## Required files ##
|
|
|
|
|
##################################################################
|
|
|
|
|
|
|
|
|
|
# the path of the fadn files for loading
|
|
|
|
|
fadn.data.dir = "D:/public/data/fadn/lieferung_20210414/csv/"
|
|
|
|
|
|
|
|
|
|
# A json file for extraction
|
|
|
|
|
|
|
|
|
|
# ceate a data.dir
|
|
|
|
|
create.data.dir(folder.path = CurrentProjectDirectory)
|
|
|
|
|
|
|
|
|
|
# Once the data.dir is created, we must declare that we are working with it
|
|
|
|
|
set.data.dir(CurrentProjectDirectory)
|
|
|
|
|
|
|
|
|
|
get.data.dir()
|
|
|
|
|
|
|
|
|
|
# After you create a data dir, below is a list of "real-world" example files:
|
|
|
|
|
# CurrentProjectDirectory/
|
|
|
|
|
# +-- csv
|
|
|
|
|
# +-- fadnUtils.metadata.json
|
|
|
|
|
# +-- rds
|
|
|
|
|
# \-- spool
|
|
|
|
|
# \-- readme.txt
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
## Import csv fadn data
|
|
|
|
|
### convert csv into fadn raw data
|
|
|
|
|
```{r csv2rds, results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
# .............. IMPORT DATA IN TWO STEPS ..........................................#
|
|
|
|
|
|
|
|
|
|
# However, you can import the file in two steps, one for converting
|
|
|
|
|
# the csv to fadn.raw.str (csv-data to raw r-data) and
|
|
|
|
|
# one for converting the fadn.raw.rds to fadn.str.rds (raw r-data
|
|
|
|
|
# to structured r-data).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#################################################################
|
|
|
|
|
## STEP 1: CONVERT CSV TO FADN.RAW.RDS ##
|
|
|
|
|
#################################################################
|
|
|
|
|
|
|
|
|
|
##-----------------------------
|
|
|
|
|
## load each file separately
|
|
|
|
|
##-----------------------------
|
|
|
|
|
# load for a specific country germany: "DEU" and from a specific year: 2009
|
|
|
|
|
convert.to.fadn.raw.rds(
|
|
|
|
|
file.path = paste0(fadn.data.dir ,"DEU2009.csv"),
|
|
|
|
|
sepS = ",",
|
|
|
|
|
fadn.country = "DEU",
|
|
|
|
|
fadn.year = 2009
|
|
|
|
|
#keep.csv = T # copy csv file in csv.dir
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
##-----------------------------
|
|
|
|
|
## load all csv files in a folder
|
|
|
|
|
##-----------------------------
|
|
|
|
|
"csv2raw function takes csv files in a folder and converts them into raw data"
|
|
|
|
|
allcsv2raw <- function(LocationofCSVFiles){
|
|
|
|
|
|
|
|
|
|
# list all csv files
|
|
|
|
|
csv_file_names <- list.files(path = LocationofCSVFiles, pattern= "*.csv$")
|
|
|
|
|
|
|
|
|
|
#csv_file_names <- "DEU, BEL"
|
|
|
|
|
for (file in csv_file_names){
|
|
|
|
|
# extract first 3 char
|
|
|
|
|
country = substr(file, 1, 3)
|
|
|
|
|
|
|
|
|
|
# extract 4-7 char
|
|
|
|
|
year = substr(file, 4, 7)
|
|
|
|
|
#year = as.numeric(gsub("\\D+", "", file))
|
|
|
|
|
|
|
|
|
|
convert.to.fadn.raw.rds(
|
|
|
|
|
file.path = paste0(fadn.data.dir,file),
|
|
|
|
|
sepS = ",",
|
|
|
|
|
fadn.country = country,
|
|
|
|
|
fadn.year = year
|
|
|
|
|
#keep.csv = T # copy csv file in csv.dir
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# load all csv
|
|
|
|
|
#allcsv2raw(fadn.data.dir)
|
|
|
|
|
|
|
|
|
|
##-----------------------------
|
|
|
|
|
## load specific year and country
|
|
|
|
|
##-----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"C.Y2raw function takes selected countries and years, then converts them into raw data"
|
|
|
|
|
C.Y2raw <- function(countries, years){
|
|
|
|
|
for (country in countries){
|
|
|
|
|
for (year in years){
|
|
|
|
|
|
|
|
|
|
file = paste0(country,year,".csv")
|
|
|
|
|
|
|
|
|
|
convert.to.fadn.raw.rds(
|
|
|
|
|
file.path = paste0(fadn.data.dir,file),
|
|
|
|
|
sepS = ",",
|
|
|
|
|
fadn.country = country,
|
|
|
|
|
fadn.year = year
|
|
|
|
|
#keep.csv = T # copy csv file in csv.dir
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# load countries: BEL, DEU and NED
|
|
|
|
|
countriesList = c("BEL", "DEU", "NED")
|
|
|
|
|
yearsList = c(2009,2010,2011,2018)
|
|
|
|
|
#C.Y2raw(countries = countriesList, years =yearsList )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
show.data.dir.contents()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If you converted the csv to raw r-data successfully, raw r-data files are saved in "rds" folder,
|
|
|
|
|
# the project's files and folders look like this:
|
|
|
|
|
|
|
|
|
|
# New_test_fadnUtils/
|
|
|
|
|
# +-- csv
|
|
|
|
|
# +-- fadnUtils.metadata.json
|
|
|
|
|
# +-- rds
|
|
|
|
|
# | +-- fadn.raw.2009.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2009.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2010.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2010.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2011.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2011.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2012.BEL.compressed.rds
|
|
|
|
|
# | \-- fadn.raw.2012.BEL.rds
|
|
|
|
|
# \-- spool
|
|
|
|
|
# \-- readme.txt
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
### convert Fadn raw data into fadn str data
|
|
|
|
|
```{r raw2str, results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
##################################################################
|
|
|
|
|
## STEP 2: CONVERT FADN.RAW.RDS TO FADN.STR.RDS ##
|
|
|
|
|
##################################################################
|
|
|
|
|
|
|
|
|
|
#######################################################################################################
|
|
|
|
|
# Notices:#
|
|
|
|
|
###########
|
|
|
|
|
## Before converting raw r-data into str r-data, it is recommended to use check.column() method
|
|
|
|
|
## so that all variables in this json file can be converted.
|
|
|
|
|
## The conversion of the raw r-data file to a structured r-data file is driven by a human-readable file,
|
|
|
|
|
## called raw_str_map.json.
|
|
|
|
|
## This json file is saved in extraction_dir by default.
|
|
|
|
|
## if you want to use raw_str_map.json by default, please put this file in extraction_dir.
|
|
|
|
|
## Or the user can define a external json file where it is
|
|
|
|
|
## and how to caculate the str r-data.
|
|
|
|
|
#######################################################################################################
|
|
|
|
|
|
|
|
|
|
rds.dir = paste0(get.data.dir(),"/rds/")
|
|
|
|
|
|
|
|
|
|
# set a str name for for saving the str r-data in rds.dir
|
|
|
|
|
new.str.name = "test"
|
|
|
|
|
|
|
|
|
|
# set a extraction_dir
|
|
|
|
|
dir.create(paste0(rds.dir, new.str.name))
|
|
|
|
|
new.extraction.dir = paste0(rds.dir, new.str.name)
|
|
|
|
|
```
|
|
|
|
|
#### check json file
|
|
|
|
|
|
|
|
|
|
```{r checkjson, results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
|
|
|
## Step 2.0: Check the variables of loaded a raw rds data and a json file --
|
|
|
|
|
##-----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# Save the modifed json file
|
|
|
|
|
list_vars = check.column(importfilepath = paste0(rds.dir, "fadn.raw.2009.BEL.rds"), # a rds file or a csv file
|
|
|
|
|
jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json", # a json file
|
|
|
|
|
rewrite_json = TRUE, # write a new json file without unmatched variables
|
|
|
|
|
extraction_dir = new.extraction.dir # save the new json in extraction_dir
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Let's see the unmathcted variables in this json file
|
|
|
|
|
print(list_vars)
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## Step 2.1: convert convert the raw r-data into str r-data --
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#check the default json file in extraction_dir
|
|
|
|
|
if ("raw_str_map.json" %in% list.files(new.extraction.dir, pattern = "\\.json$")){
|
|
|
|
|
cat(new.extraction.dir, "has a raw_str_map.json.", "\n")
|
|
|
|
|
}else{warning("please put a raw_str_map.json in ", new.extraction.dir,"\n", "Or using a external json file (option 2)", "\n")}
|
|
|
|
|
```
|
|
|
|
|
#### Convert fadn raw data into str data using a raw_str_map.json
|
|
|
|
|
```{r results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
## option 1: convert the file separately using a raw_str_map.json in extraction_dir
|
|
|
|
|
## making sure that a raw_str_map.json is in extraction_dir
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
|
|
|
|
|
convert.to.fadn.str.rds(fadn.country = "BEL",
|
|
|
|
|
fadn.year = 2009,
|
|
|
|
|
str.name = new.str.name # extraction_dir
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
## option 2: convert the file separately using a external json file
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
|
|
|
|
|
# If force_external_raw_str_map is TRUE
|
|
|
|
|
# this external json file will be copied to extraction_dir as raw_str_map.json,
|
|
|
|
|
convert.to.fadn.str.rds(fadn.country = "BEL",
|
|
|
|
|
fadn.year = 2009,
|
|
|
|
|
raw_str_map.file = "D:/public/yang/MIND_STEP/new_sample/test01/raw_str_map.json", # a external json file
|
|
|
|
|
str.name = new.str.name, # extraction_dir
|
|
|
|
|
force_external_raw_str_map = T,
|
|
|
|
|
DEBUG = F
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
## option 3: convert mutilple raw r-data files in rds.dir into str r-data in extraction_dir
|
|
|
|
|
##::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
|
|
|
|
|
|
|
|
"raw2str function will help us to convert the raw r-data into the str r-data in rds.dir completely.
|
|
|
|
|
This function takes a user-defined raw_str_map.file and a logical constant which is FALSE by default,
|
|
|
|
|
and converting raw data to str data."
|
|
|
|
|
raw2str <- function(Current_raw_str_map.file = NULL, overwrite_external_json = F){
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rds.dir = paste0(get.data.dir(), "/rds")
|
|
|
|
|
|
|
|
|
|
raw_file_names <- dir(rds.dir, pattern ="\\.rds$" )
|
|
|
|
|
|
|
|
|
|
for (file in raw_file_names){
|
|
|
|
|
|
|
|
|
|
if (!grepl("compressed", file)){
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# extract first 3 char
|
|
|
|
|
country = substr(file, 15, 17)
|
|
|
|
|
|
|
|
|
|
# extract number
|
|
|
|
|
year = as.numeric(gsub("\\D+", "", file))
|
|
|
|
|
|
|
|
|
|
cat("converting the str data for country: ", country, " and year: ", year, "\n")
|
|
|
|
|
tryCatch(
|
|
|
|
|
expr = {
|
|
|
|
|
convert.to.fadn.str.rds(fadn.country = country,
|
|
|
|
|
fadn.year = year,
|
|
|
|
|
raw_str_map.file = Current_raw_str_map.file,
|
|
|
|
|
force_external_raw_str_map = overwrite_external_json,
|
|
|
|
|
str.name = new.str.name
|
|
|
|
|
)
|
|
|
|
|
},
|
|
|
|
|
warning = function(w){
|
|
|
|
|
message('Caught an warning!')
|
|
|
|
|
print(w)
|
|
|
|
|
},
|
|
|
|
|
error = function(e) {
|
|
|
|
|
message("Caught an error! Please check the objects in json file using check.column() (see more in USE_CASE_4.R).")
|
|
|
|
|
#cat("Wrong, can't convert the str r-data!",sep = "\n")
|
|
|
|
|
print(e)
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else{cat("It's compressed data!",sep = "\n")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#---------------------------------------------------------------
|
|
|
|
|
# option 3.1: using a raw_str_map.json by defaut
|
|
|
|
|
#---------------------------------------------------------------
|
|
|
|
|
# convert str data with a default json file, make sure that a raw_str_map.json in extraction_dir
|
|
|
|
|
#raw2str()
|
|
|
|
|
# or
|
|
|
|
|
#raw2str(Current_raw_str_map.file = "D:/public/yang/MIND_STEP/new_sample/test01/raw_str_map.json", overwrite_external_json = F)
|
|
|
|
|
|
|
|
|
|
#---------------------------------------------------------------
|
|
|
|
|
# option 3.2: using a external json file
|
|
|
|
|
#---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# convert str data using a external json file
|
|
|
|
|
#raw2str(Current_raw_str_map.file = "D:/public/yang/MIND_STEP/new_sample/test01/raw_str_map.json", overwrite_external_json = T)
|
|
|
|
|
|
|
|
|
|
show.data.dir.contents()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If str r-data was converted, the str r-data is saved in "test"(new.str.name) folder as below.
|
|
|
|
|
|
|
|
|
|
# New_test_fadnUtils/
|
|
|
|
|
# +-- csv
|
|
|
|
|
# +-- fadnUtils.metadata.json
|
|
|
|
|
# +-- rds
|
|
|
|
|
# | +-- fadn.raw.2009.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2009.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2010.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2010.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2011.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2011.BEL.rds
|
|
|
|
|
# | +-- fadn.raw.2012.BEL.compressed.rds
|
|
|
|
|
# | +-- fadn.raw.2012.BEL.rds
|
|
|
|
|
# | \-- test
|
|
|
|
|
# | +-- fadn.str.2009.BEL.rds
|
|
|
|
|
# | +-- raw_str_map.json
|
|
|
|
|
# | \-- rewrite_2014_after_copy.json
|
|
|
|
|
# \-- spool
|
|
|
|
|
# +-- my_logfile.txt
|
|
|
|
|
# \-- readme.txt
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
## Load R-data from data.dir
|
|
|
|
|
```{r results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
#################################################################
|
|
|
|
|
## LOAD RAW R-DATA ##
|
|
|
|
|
#################################################################
|
|
|
|
|
### We can either load raw r-data files (the original FADN csv in r-friendly format),
|
|
|
|
|
### or structured r-data files (the original data transformed into meaningful
|
|
|
|
|
### information)
|
|
|
|
|
# To load raw r-data, only for BEL and 2009
|
|
|
|
|
my.data = load.fadn.raw.rds(
|
|
|
|
|
countries = "BEL",
|
|
|
|
|
years = 2009
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# my.data is a single large data.table, with the original csv columns and rows
|
|
|
|
|
nrow(my.data) #Number of rows
|
|
|
|
|
names(my.data) #Column names
|
|
|
|
|
length(names(my.data)) #Number of columns
|
|
|
|
|
str(my.data) #Overall structure
|
|
|
|
|
|
|
|
|
|
##################################################################
|
|
|
|
|
## LOAD STRUCTURED R-DATA ##
|
|
|
|
|
##################################################################
|
|
|
|
|
|
|
|
|
|
#To load structured data, for BEL and 2009
|
|
|
|
|
my.data.2009 = load.fadn.str.rds(
|
|
|
|
|
countries = "BEL",
|
|
|
|
|
years = 2009,
|
|
|
|
|
extraction_dir = new.str.name # Location of the str r-data
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# You can see that my.data is a list, with three elements: info, costs, crops
|
|
|
|
|
str(my.data.2009)
|
|
|
|
|
|
|
|
|
|
# You can access each individual element like this
|
|
|
|
|
str(my.data.2009$info)
|
|
|
|
|
#str(my.data.2009$costs)
|
|
|
|
|
#str(my.data.2009$crops)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# The first columns of each of the above elements (info, costs, crops)
|
|
|
|
|
# are created according to the ID section of the raw_str_map
|
|
|
|
|
names(my.data.2009$info)
|
|
|
|
|
names(my.data.2009$costs)
|
|
|
|
|
names(my.data.2009$crops)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# info and costs data.tables are in wide-format (each observation in a single row,
|
|
|
|
|
# all attributes of a single observation in different columns).
|
|
|
|
|
# crops element is in long format (one observation is in many rows,
|
|
|
|
|
#
|
|
|
|
|
#
|
|
|
|
|
# See https://seananderson.ca/2013/10/19/reshape/ for
|
|
|
|
|
# discussion of the two types of data formats
|
|
|
|
|
head(my.data.2009$info)
|
|
|
|
|
head(my.data.2009$costs)
|
|
|
|
|
head(my.data.2009$crops)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Also on the attributes section of each of the above elements, we can access
|
|
|
|
|
# the column formulas and descriptions, as defined in the raw_str_map file.
|
|
|
|
|
# View(
|
|
|
|
|
# attr(my.data.2009$info,"column.descriptions")
|
|
|
|
|
# )
|
|
|
|
|
# View(
|
|
|
|
|
# attr(my.data.2009$costs,"column.descriptions")
|
|
|
|
|
# )
|
|
|
|
|
# View(
|
|
|
|
|
# attr(my.data.2009$crops,"column.descriptions")
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Especially for the crops element, we can also see the description
|
|
|
|
|
# CROP column
|
|
|
|
|
# View(
|
|
|
|
|
# attr(my.data.2009$crops,"crops.descriptions")
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
#################################################################
|
|
|
|
|
## LOAD COUNTRIES-YEARS COMBINATIONS ##
|
|
|
|
|
#################################################################
|
|
|
|
|
### In the previous examples, we showed how to load data for one country and
|
|
|
|
|
### one year In the following examples we show more combinations.
|
|
|
|
|
|
|
|
|
|
#To load for DEU and NED for year 2015
|
|
|
|
|
my.data = load.fadn.str.rds(countries = c("DEU","NED"), years = c(2009,2010,2011), extraction_dir = new.str.name )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#To load for DEU and NED for all years
|
|
|
|
|
my.data = load.fadn.str.rds(countries = c("DEU","NED"),extraction_dir = new.str.name )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#To load all available countries for year 2015
|
|
|
|
|
my.data = load.fadn.str.rds(years = 2015, extraction_dir = new.str.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#To load all availabel data
|
|
|
|
|
# my.data = load.fadn.str.rds(extraction_dir = new.str.name)
|
|
|
|
|
|
|
|
|
|
#################################################################
|
|
|
|
|
## HOW TO STORE THE LOAD ##
|
|
|
|
|
#################################################################
|
|
|
|
|
|
|
|
|
|
#TODOS
|
|
|
|
|
|
|
|
|
|
# Since loading data sometimes takes time and create big datasets
|
|
|
|
|
# fadUtils offers a way to save the dataset created from the load call
|
|
|
|
|
|
|
|
|
|
# The first step is to store the loaded data
|
|
|
|
|
# Provide the object to save, a name and a description
|
|
|
|
|
# store.rds.data(my.data,"everything","all countries and years are here")
|
|
|
|
|
```
|
|
|
|
|
## Perform analysis and Plots
|
|
|
|
|
```{r results='hide', message=FALSE, warning=FALSE}
|
|
|
|
|
|
|
|
|
|
#We load structured data for all available countries and years
|
|
|
|
|
my.str.data = load.fadn.str.rds(extraction_dir = "OST")
|
|
|
|
|
|
|
|
|
|
##----------------------------------------------------------------
|
|
|
|
|
## HOW MANY FARMS FOR EACH COUNTY AND EACH YEAR --
|
|
|
|
|
##----------------------------------------------------------------
|
|
|
|
|
# we use the info DT, and group by YEAR-COUNTRY
|
|
|
|
|
my.str.data$info[,.N,by=list(YEAR,COUNTRY)]
|
|
|
|
|
|
|
|
|
|
#We can also use dcast, to show a more tabular format
|
|
|
|
|
dcast(
|
|
|
|
|
my.str.data$info,
|
|
|
|
|
YEAR~COUNTRY,
|
|
|
|
|
fun.aggregate = length,
|
|
|
|
|
value.var =
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# We can also export to clipboard, using the write.excel utility function
|
|
|
|
|
# After running the following command, open excel and paste. The result will appear.
|
|
|
|
|
write.excel(
|
|
|
|
|
dcast(
|
|
|
|
|
my.str.data$info,
|
|
|
|
|
YEAR~COUNTRY,
|
|
|
|
|
fun.aggregate = length,
|
|
|
|
|
value.var =
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## ALL CROP AREAS PER COUNTRY-YEAR --
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# First, calculate the weighted area
|
|
|
|
|
my.str.data$crops[
|
|
|
|
|
VARIABLE=="LEVL",
|
|
|
|
|
VALUE.w:=WEIGHT*VALUE/1000
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Then dcast that variable
|
|
|
|
|
dcast(
|
|
|
|
|
my.str.data$crops[VARIABLE=="LEVL"],
|
|
|
|
|
COUNTRY+CROP~YEAR,
|
|
|
|
|
value.var = "VALUE.w",
|
|
|
|
|
fun.aggregate = sum,
|
|
|
|
|
na.rm = T
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## ALL CROP PRODUCTION PER COUNTRY-YEAR --
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dcast(
|
|
|
|
|
my.str.data$crops[VARIABLE=="GROF",VALUE.w:=WEIGHT*VALUE/1000],
|
|
|
|
|
COUNTRY+CROP~YEAR,
|
|
|
|
|
value.var = "VALUE.w",
|
|
|
|
|
fun.aggregate = sum,
|
|
|
|
|
na.rm = T
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## BARLEY PRODUCTION PER COUNTRY-YEAR --
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
dcast(
|
|
|
|
|
my.str.data$crops[
|
|
|
|
|
VARIABLE=="GROF" & CROP=="BARL",
|
|
|
|
|
VALUE.w:=WEIGHT*VALUE/1000
|
|
|
|
|
],
|
|
|
|
|
COUNTRY~YEAR,
|
|
|
|
|
value.var = "VALUE.w",
|
|
|
|
|
fun.aggregate = sum,
|
|
|
|
|
na.rm = T
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
##----------------------------------------------------------------
|
|
|
|
|
## DISTRIBUTION OF NUMBER OF CROPS PER COUNTRY-YEAR --
|
|
|
|
|
##----------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
crops.data = my.str.data$crops #catering for easier access at next steps
|
|
|
|
|
|
|
|
|
|
#this contains the number of crops for each farm-country-year/
|
|
|
|
|
# Be carefule, we hav to filter to count only the LEVL variable
|
|
|
|
|
crops.data.Ncrops = crops.data[VARIABLE=="LEVL",.N,by=list(COUNTRY,YEAR,ID)]
|
|
|
|
|
|
|
|
|
|
# This displays the quantiles of the number of crops
|
|
|
|
|
crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)]
|
|
|
|
|
|
|
|
|
|
# R excels on graphic representation of results
|
|
|
|
|
library(ggplot2)
|
|
|
|
|
|
|
|
|
|
ggplot(crops.data.Ncrops,aes(y=N,x=1)) +
|
|
|
|
|
geom_boxplot() +
|
|
|
|
|
facet_grid(YEAR~COUNTRY) +
|
|
|
|
|
theme(axis.title.x=element_blank(),
|
|
|
|
|
axis.text.x=element_blank(),
|
|
|
|
|
axis.ticks.x=element_blank()
|
|
|
|
|
)+
|
|
|
|
|
ylab("Number of Crops")
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------------------
|
|
|
|
|
## COLLECT COMMON ID FROM LOADED STRUCTURED R-DATA AND LOADED RAW R_DATA --
|
|
|
|
|
##---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# Collection the common id from loaded str r-data
|
|
|
|
|
collected.common.id_str = collect.common.id(my.str.data)
|
|
|
|
|
|
|
|
|
|
# Collection the common id from loaded raw r-data
|
|
|
|
|
collected.common.id_raw = collect.common.id(my.data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## CALCULATION BASED ON COLLECETED COMMON ID --
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
# sommaries for infos
|
|
|
|
|
|
|
|
|
|
# sample and representend number of farms
|
|
|
|
|
my.str.data$info[,list(Nobs_sample=.N,Nobs_represented=sum(WEIGHT)),
|
|
|
|
|
by=.(COUNTRY,YEAR)]
|
|
|
|
|
|
|
|
|
|
# only for full sample (common id over years in selected data)
|
|
|
|
|
my.str.data$info[ID %in% collected.common.id_str[[1]],
|
|
|
|
|
list(Nobs_sample=.N,
|
|
|
|
|
Nobs_represented=sum(WEIGHT)),
|
|
|
|
|
by=.(COUNTRY,YEAR)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# some summaries for crops
|
|
|
|
|
|
|
|
|
|
# unweighted and weighted sum over countries, years, crops and variables (EAAP GROF INTF LEVL SHARE UVAG UVSA)
|
|
|
|
|
# EAA: Economic Accounts of Agriculture
|
|
|
|
|
# EAAP: EAA value at producer prices
|
|
|
|
|
# GROF
|
|
|
|
|
|
|
|
|
|
my.str.data$crops[ ,list(VALUE=sum(VALUE),
|
|
|
|
|
VALUE_weighted=sum(VALUE*WEIGHT)),
|
|
|
|
|
by=.(COUNTRY,YEAR,CROP,VARIABLE)]
|
|
|
|
|
|
|
|
|
|
# only for full sample (common id over years in selected data)
|
|
|
|
|
my.str.data$crops[ID %in% collected.common.id_str[[1]],
|
|
|
|
|
list(VALUE=sum(VALUE),
|
|
|
|
|
VALUE_weighted=sum(VALUE*WEIGHT)),
|
|
|
|
|
by=.(COUNTRY,YEAR,CROP,VARIABLE)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
my.str.data$crops[ID %in% collected.common.id_str[,common_id],
|
|
|
|
|
list(VALUE=sum(VALUE),
|
|
|
|
|
VALUE_weighted=sum(VALUE*WEIGHT)),
|
|
|
|
|
by=.(COUNTRY,YEAR,CROP,VARIABLE,ID)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
## Load fadn raw data and search the the number of common id for adjacent combination years
|
|
|
|
|
##---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"find all adjacent combinations in a list"
|
|
|
|
|
myFun <- function(Data) {
|
|
|
|
|
A <- lapply(1:(length(Data)), sequence)
|
|
|
|
|
B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x)))
|
|
|
|
|
unlist(lapply(seq_along(A), function(x) {
|
|
|
|
|
lapply(B[[x]], function(y) Data[A[[x]]+y])
|
|
|
|
|
}), recursive = FALSE, use.names = FALSE)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"add a string to the facet label text and split it in two lines"
|
|
|
|
|
label_facet <- function(original_var, custom_name){
|
|
|
|
|
lev <- levels(as.factor(original_var))
|
|
|
|
|
lab <- paste0(lev, " \n ",custom_name)
|
|
|
|
|
names(lab) <- lev
|
|
|
|
|
return(lab)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"multi-panel plots using facet_wrap() for dynamic choice"
|
|
|
|
|
figure <- function(country, df, n){
|
|
|
|
|
|
|
|
|
|
p = df %>%
|
|
|
|
|
# reorder by Num_id
|
|
|
|
|
#ggplot( aes(x = reorder(Years, -Num_id) ,y=Num_id))
|
|
|
|
|
ggplot( aes(x = Years ,y=Num_id))+
|
|
|
|
|
geom_bar( stat="identity",
|
|
|
|
|
position = position_dodge(width = 0.8),
|
|
|
|
|
width=0.5,
|
|
|
|
|
#fill = rainbow(n=length(df$Num_id))
|
|
|
|
|
fill = "#00abff"
|
|
|
|
|
) +
|
|
|
|
|
coord_flip()+
|
|
|
|
|
#labs(title = paste0("Plot of the Number of common ID for country: ", country ), fill = "Years") +
|
|
|
|
|
xlab("Years") +
|
|
|
|
|
ylab("Number of common ID") +
|
|
|
|
|
geom_text(aes(label=Num_id), vjust=0.5, colour="black", size=3.5)+
|
|
|
|
|
theme(axis.text.x=element_text(color = "black", size=6, angle=0, vjust=.8, hjust=0.8)) +
|
|
|
|
|
scale_x_discrete(labels = function(x) str_wrap(x, width = n)) +
|
|
|
|
|
facet_wrap( ~ group, scales="free",
|
|
|
|
|
#labeller=names
|
|
|
|
|
labeller = labeller(group = label_facet(df$group, "adjacent combinations"))
|
|
|
|
|
)+
|
|
|
|
|
ggtitle(paste0("Number of common ID for country: ", country )) +
|
|
|
|
|
theme_bw() +
|
|
|
|
|
theme(plot.title = element_text(hjust = 0.5))
|
|
|
|
|
p
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
"load raw data and get the number of common id for selected countries over all exist adjacent combinations years.
|
|
|
|
|
then save the number of common id to an excel sheet and plot"
|
|
|
|
|
output_common_id <- function(countries_list, saveExcel = TRUE, excelname , savePlots = TRUE){
|
|
|
|
|
|
|
|
|
|
rds.dir = paste0(get.data.dir(),"/rds/")
|
|
|
|
|
plots.dir = paste0(get.data.dir(), "/plots/")
|
|
|
|
|
if (!dir.exists(plots.dir)) dir.create(plots.dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
library(xlsx)
|
|
|
|
|
library(openxlsx)
|
|
|
|
|
|
|
|
|
|
xlsx_file_dir <- paste0(get.data.dir(), "/spool/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (saveExcel==TRUE) {wb <- createWorkbook(paste0(xlsx_file_dir, excelname))}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outlist = list()
|
|
|
|
|
for (country in countries_list){
|
|
|
|
|
|
|
|
|
|
cat("Country:", country, '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
raw_file_names <- dir(rds.dir, pattern = paste0(country, "\\.rds$") )
|
|
|
|
|
years_list = as.numeric(gsub("\\D+", "", raw_file_names))
|
|
|
|
|
|
|
|
|
|
adjacent_list = myFun(years_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
my.data = list()
|
|
|
|
|
|
|
|
|
|
for (year_items in adjacent_list) {
|
|
|
|
|
|
|
|
|
|
name = toString(year_items)
|
|
|
|
|
|
|
|
|
|
data = load.fadn.raw.rds(countries = country, years = year_items)
|
|
|
|
|
|
|
|
|
|
my.data[[name]] = data
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Big.Num.Common.id = list()
|
|
|
|
|
|
|
|
|
|
for (data_list in names(my.data)){
|
|
|
|
|
|
|
|
|
|
common.id = collect.common.id(my.data[[data_list]])
|
|
|
|
|
|
|
|
|
|
Big.Num.Common.id[[data_list]] = nrow(common.id)
|
|
|
|
|
}
|
|
|
|
|
DF = do.call(rbind, Big.Num.Common.id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DF = data.frame(DF)
|
|
|
|
|
colnames(DF) <- "Num_id"
|
|
|
|
|
DF$Years <- row.names(DF)
|
|
|
|
|
|
|
|
|
|
outlist[[country]] = DF
|
|
|
|
|
|
|
|
|
|
if (!is.null(wb)) {
|
|
|
|
|
if (!(country %in% names(wb))) {
|
|
|
|
|
addWorksheet(wb, country)}
|
|
|
|
|
writeData(wb,country, DF)
|
|
|
|
|
}
|
|
|
|
|
if (savePlots == TRUE){
|
|
|
|
|
library(ggplot2)
|
|
|
|
|
library(stringr)
|
|
|
|
|
DF$Length <- str_count(DF$Years)
|
|
|
|
|
DF$group <- cut(DF$Length, breaks=c(1,5,14,18,25,30,35,40,48,55,58,70,Inf))
|
|
|
|
|
levels(DF$group) <- c("1 year","2 years","3 years","4 years","5 years","6 years","7 years",
|
|
|
|
|
"8 years","9 years","10 years","11, 12 years",">12 years")
|
|
|
|
|
if (country == "NED"){
|
|
|
|
|
p <- figure(country,DF, 35)}
|
|
|
|
|
else{p <- figure(country,DF, 20)}
|
|
|
|
|
|
|
|
|
|
ggsave(plot = p,
|
|
|
|
|
filename = paste0(plots.dir,country ,"_plot.png"),
|
|
|
|
|
width = 18, height = 8)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (saveExcel == TRUE) {
|
|
|
|
|
saveWorkbook(wb, paste0(xlsx_file_dir, excelname), overwrite = T)
|
|
|
|
|
cat(excelname," is saved in ",xlsx_file_dir, "\n")}
|
|
|
|
|
if (savePlots == TRUE) cat("plots are saved in", plots.dir, "\n")
|
|
|
|
|
|
|
|
|
|
return(outlist)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# get all countires in fadn str data
|
|
|
|
|
countries = unique(my.str.data$info$COUNTRY)
|
|
|
|
|
|
|
|
|
|
#ID_list <- output_common_id(countries_list = countries)
|
|
|
|
|
|
|
|
|
|
# get Germany: DEU and Kroatien: HRV
|
|
|
|
|
DEU_list <- output_common_id(c("HRV", "DEU"), saveExcel = TRUE, excelname = "HRV_DEU.xlsx", savePlots = TRUE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|