Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
library(fadnUtils)
library(writexl)
library(jsonlite)
library(xlsx)
# FADN Data DIR
dir = "D:/public/data/fadn/lieferung_20210414/csv/"
setwd(dir)
# Get all csv file in FADN data dir
csv_files = list.files(path = dir, pattern= "*.csv$")
csv_list = data.frame(csv_files)
colnames(csv_list) = "names"
# split string into two columns at data frame based on "."
df = data.frame(do.call('rbind', strsplit(as.character(csv_list$names),'.',fixed=TRUE)))
df
# split data frame string into 2 columns
df = separate(df, X1, into = c("country", "Year"), sep = 3, remove = FALSE)
table(df$country)
table(df$Year)
countires = unique(df$country)
years = unique(df$Year)
# 28 countries
#
# Get all csv files for country "DEU"
DEU_csv <- grep("^DEU", csv_list$names, value = TRUE)
length(DEU_csv)
#++++++++++++++++++++++++++++
# set a project dir
set.data.dir("D:/public/yang/MIND_STEP/new_sample")
get.data.dir()
# convert CSV into Raw data for DEU
for(file in csv_files) {
print(file)
# convert into data table
country = substr(file, 1, 3)
# extract 4-7 char
year = substr(file, 4, 7)
convert.to.fadn.raw.rds(
file.path = file,
sepS = ",",
fadn.country = country,
fadn.year = year,
col.id = "ID")
}
show.data.dir.contents()
# CONVERT FADN.RAW.RDS TO FADN.STR.RDS
rds.dir = paste0(get.data.dir(),"/rds/")
# set a str name for for saving the str r-data in rds.dir
new.str.name = "DEU"
# set a extraction_dir
dir.create(paste0(rds.dir, new.str.name))
new.extraction.dir = paste0(rds.dir, new.str.name)
# Save the modifed json file
list_vars = check.column(importfilepath = paste0(rds.dir, "fadn.raw.2018.BEL.rds"), # a rds file or a csv file
jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json", # a json file
rewrite_json = TRUE, # write a new json file without unmatched variables
extraction_dir = new.extraction.dir # save the new json in extraction_dir
)
#********************************
year_list = c(2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018)
# find all adjacent combinations in a list
myFun <- function(Data) {
A <- lapply(2:(length(Data)-1L), sequence)
B <- lapply(rev(lengths(A))-1L, function(x) c(0, sequence(x)))
unlist(lapply(seq_along(A), function(x) {
lapply(B[[x]], function(y) Data[A[[x]]+y])
}), recursive = FALSE, use.names = FALSE)
}
adjacent_list = myFun(year_list)
adjacent_list[[45]] = year_list
my.data = list()
for (year_items in adjacent_list) {
name = toString(year_items)
print(class(name))
data = load.fadn.raw.rds(countries = "DEU", years = year_items)
my.data[[name]] = data
}
Big.Num.Common.id = list()
for (data_list in 1:length(my.data)){
data = my.data[data_list]
# Retrieving column names
name = names(data)
print("******************************")
colnames(data[[name]])[which( names(data[[name]]) == "ID")] <- "id"
common.id = collect.common.id(data[[name]])
Big.Num.Common.id[[name]] = nrow(common.id)
}
DF = do.call(rbind, Big.Num.Common.id)
DF = data.frame(DF)
DF$Years <- rownames(DF)
colnames(DF) <- "the number of common id"
rownames(DF) <- NULL
write_xlsx(DF, "D:/public/yang/MIND_STEP/new_sample/DEUData_common_id.xlsx")