|
|
|
library(fadnUtils)
|
|
|
|
library(writexl)
|
|
|
|
library(jsonlite)
|
|
|
|
library(xlsx)
|
|
|
|
# FADN Data DIR
|
|
|
|
dir = "D:/public/data/fadn/lieferung_20210414/csv/"
|
|
|
|
setwd(dir)
|
|
|
|
|
|
|
|
# Get all csv file in FADN data dir
|
|
|
|
csv_files = list.files(path = dir, pattern= "*.csv$")
|
|
|
|
csv_list = data.frame(csv_files)
|
|
|
|
colnames(csv_list) = "names"
|
|
|
|
|
|
|
|
|
|
|
|
# split string into two columns at data frame based on "."
|
|
|
|
df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE)))
|
|
|
|
df
|
|
|
|
# split data frame string into 2 columns
|
|
|
|
df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE)
|
|
|
|
table(df$country)
|
|
|
|
table(df$Year)
|
|
|
|
countires = unique(df$country)
|
|
|
|
years = unique(df$Year)
|
|
|
|
# 28 countries
|
|
|
|
#
|
|
|
|
|
|
|
|
# Get all csv files for country "DEU"
|
|
|
|
DEU_csv <- grep("^DEU", csv_list$names, value = TRUE)
|
|
|
|
|
|
|
|
length(DEU_csv)
|
|
|
|
|
|
|
|
#++++++++++++++++++++++++++++
|
|
|
|
|
|
|
|
# set a project dir
|
|
|
|
set.data.dir("D:/public/yang/MIND_STEP/new_sample")
|
|
|
|
|
|
|
|
get.data.dir()
|
|
|
|
|
|
|
|
# convert CSV into Raw data for DEU
|
|
|
|
for(file in csv_files) {
|
|
|
|
print(file)
|
|
|
|
|
|
|
|
|
|
|
|
# convert into data table
|
|
|
|
country = substr(file, 1, 3)
|
|
|
|
|
|
|
|
# extract 4-7 char
|
|
|
|
year = substr(file, 4, 7)
|
|
|
|
|
|
|
|
convert.to.fadn.raw.rds(
|
|
|
|
file.path = file,
|
|
|
|
sepS = ",",
|
|
|
|
fadn.country = country,
|
|
|
|
fadn.year = year,
|
|
|
|
col.id = "ID")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
show.data.dir.contents()
|
|
|
|
|
|
|
|
# CONVERT FADN.RAW.RDS TO FADN.STR.RDS
|
|
|
|
|
|
|
|
rds.dir = paste0(get.data.dir(),"/rds/")
|
|
|
|
|
|
|
|
# set a str name for for saving the str r-data in rds.dir
|
|
|
|
new.str.name = "DEU"
|
|
|
|
|
|
|
|
# set a extraction_dir
|
|
|
|
dir.create(paste0(rds.dir, new.str.name))
|
|
|
|
new.extraction.dir = paste0(rds.dir, new.str.name)
|
|
|
|
|
|
|
|
|
|
|
|
# Save the modifed json file
|
|
|
|
list_vars = check.column(importfilepath = paste0(rds.dir, "fadn.raw.2018.BEL.rds"), # a rds file or a csv file
|
|
|
|
jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json", # a json file
|
|
|
|
rewrite_json = TRUE, # write a new json file without unmatched variables
|
|
|
|
extraction_dir = new.extraction.dir # save the new json in extraction_dir
|
|
|
|
)
|
|
|
|
|
|
|
|
#********************************
|
|
|
|
|
|
|
|
year_list = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018)
|
|
|
|
# find all adjacent combinations in a list
|
|
|
|
myFun <- function(Data) {
|
|
|
|
A <- lapply(2:(length(Data)-1L), sequence)
|
|
|
|
B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x)))
|
|
|
|
unlist(lapply(seq_along(A), function(x) {
|
|
|
|
lapply(B[[x]], function(y) Data[A[[x]]+y])
|
|
|
|
}), recursive = FALSE, use.names = FALSE)
|
|
|
|
}
|
|
|
|
adjacent_list = myFun(year_list)
|
|
|
|
adjacent_list[[45]] = year_list
|
|
|
|
my.data = list()
|
|
|
|
|
|
|
|
for (year_items in adjacent_list) {
|
|
|
|
name = toString(year_items)
|
|
|
|
print(class(name))
|
|
|
|
data = load.fadn.raw.rds(countries = "DEU", years = year_items)
|
|
|
|
my.data[[name]] = data
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Big.Num.Common.id = list()
|
|
|
|
for (data_list in 1:length(my.data)){
|
|
|
|
data = my.data[data_list]
|
|
|
|
# Retrieving column names
|
|
|
|
name = names(data)
|
|
|
|
|
|
|
|
print("******************************")
|
|
|
|
|
|
|
|
colnames(data[[name]])[which( names(data[[name]]) == "ID")] <- "id"
|
|
|
|
|
|
|
|
common.id = collect.common.id(data[[name]])
|
|
|
|
|
|
|
|
|
|
|
|
Big.Num.Common.id[[name]] = nrow(common.id)
|
|
|
|
}
|
|
|
|
DF = do.call(rbind, Big.Num.Common.id)
|
|
|
|
|
|
|
|
DF = data.frame(DF)
|
|
|
|
|
|
|
|
DF$Years <- rownames(DF)
|
|
|
|
colnames(DF) <- "the number of common id"
|
|
|
|
rownames(DF) <- NULL
|
|
|
|
|
|
|
|
write_xlsx(DF, "D:/public/yang/MIND_STEP/new_sample/DEUData_common_id.xlsx")
|
|
|
|
|
|
|
|
|
|
|
|
|