Newer
Older
Hugo Scherer
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# -------------------------------------
# Script: import_fadn.R
# Author: Hugo Scherer
# Purpose: Preparing FADN data for use in FarmDyn
# Version: 2.0.0
# Date: 23.01.2023
# Notes:
#
# Copyright(c) 2023 Wageningen Economic Research
# -------------------------------------
# Source import_fadn.R
## fadn2fd ----
#' `fadn2fd()` imports the FADN data from the csv files
#' @param fadn_data FADN
#' @param mapping A vector with the names of the columns to be mapped
#' @param farmbranch Either arable or dairy
#' @param save_gdx Logical. If TRUE, it saves to gdx files
#' @return A list of dataframes with the FADN data
#' @export fadn2fd
#'
fadn2fd <- function(fadn_data, mapping, farmbranch = c("arable", "dairy"), save_gdx = FALSE) {
FADN$ID <- factor(FADN$ID)
FADN2 <- FADN
if (farmbranch != "arable") {
# Dairy conditions ----
FADN <- FADN[FADN$TF14 == 45 & FADN$SE086 >= 5, ]
# Remove observations with the lowest 10% of milk yields
# and the highest 1% of milk yields
FADN <- FADN[order(FADN$SE126), ] %>%
dplyr::slice(-c(
1:round(nrow(FADN) * 0.1),
(nrow(FADN) - round(nrow(FADN) * 0.01)):nrow(FADN)
))
} else if (farmbranch == "arable") {
# Arable conditions ----
FADN <- FADN[FADN$TF14 %in% c(15, 16, 20), ]
# Subset the data to those whose arable land is 0.98
# the sum of the area of the crops
narbl2 <- FADN[, c("ID", FADN2Dyn$`AA(ha)`), with = FALSE]
# Get the total arable area
narbl2[,
`global%nArabLand` := rowSums(.SD, na.rm = TRUE),
by = ID, .SDcols = FADN2Dyn$`AA(ha)`
]
# Select only ID and global%nArabLand
narbl2 <- narbl2[, c("ID", "global%nArabLand")]
narbl_uaa <- merge(narbl2, FADN[, c("ID", "SE025")], by = "ID")
narbl_uaa$ratio <- narbl_uaa$`global%nArabLand` / narbl_uaa$SE025
gr_cult_ids <- narbl_uaa[narbl_uaa$ratio >= 0.8, ]$ID
narbl2 <- narbl2[narbl2$ID %in% gr_cult_ids, ]
FADN <- FADN[FADN$ID %in% gr_cult_ids, ]
} else {
rlang::abort("Please select a valid option for farmbranch")
}
yield_gen(FADN, FADN2, reporting = FALSE)
message("Preparing FADN data")
new_fadn <- FADN[
,
.(
FADN[,
c("ID", "COUNTRY", "TF14", "NUTS0", "NUTS1", "NUTS2", "NUTS3", "SYS02"),
with = FALSE
],
`global%Aks` = SE010,
`misc%milkPrice` = PMLKCOW_SV * 0.1 / PMLKCOW_SQ,
`global%milkYield` = SE126 / 100,
`global%farmbranchDairy` = ifelse(TF14 == 45, -1, -2),
`global%farmbranchMotherCows` = -2,
`global%farmbranchBeef` = ifelse(TF14 == 49, -1, -2),
`global%farmbranchSows` = -2,
`global%farmbranchBiogas` = -2,
`global%farmbranchFattners` = ifelse(TF14 == 51, -1, -2),
`misc%farmincome` = SE410,
`misc%net_value_added` = SE415,
`global%nCows` = SE086,
`global%nCalves` = LBOV1_AN,
`global%nFattners` = LPIGFAT_AN,
`global%nHeifs` = LHEIFBRE_AN,
`global%nSows` = LSOWBRE_AN,
`global%nGrasLand` = SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A,
`global%ShareGrassLand` = (SE071 - CFODRTBR_A - CFODMZ_A - CLEG_A - CFODOTH_A) / SE025,
`misc%UAA` = SE025,
`misc%Winterbarley` = CBRL_A,
`misc%WinterWheat` = CWHTC_A,
`misc%Potatoes` = CPOT_A,
`misc%SugarBeet` = CSUGBT_A,
`misc%WinterRape` = CRAPE_A,
`misc%SummerCere` = CCEROTH_A + COAT_A + CRYE_A,
`misc%MaizSil` = CFODMZ_A,
`misc%MaizCorn` = CMZ_A,
`misc%summerBeans` = CPEA_A,
`misc%totaloutput` = SE131,
`misc%subsidies` = SE605, # excluding investment subsidides
`misc%farm_net_income` = SE420,
`misc%own_assets` = SE436,
`misc%depreciation` = SE360,
`misc%depreciationpercow` = SE360 / SE086,
`misc%N_use` = INUSE_Q,
`misc%total_specific_costs` = SE281,
`misc%total_feed_costs` = SE310 + SE320,
`misc%contractors` = SE350,
`misc%wagespaid` = SE375,
`misc%cropprotection` = SE300,
`misc%rent` = SE375,
`misc%fertiliser` = SE295,
`misc%seedsandplants` = SE285,
`misc%othercosts` = IOVHSOTH_V,
`misc%paidinterest` = SE380,
`misc%purchasedconc` = IGRFEDCNCTRPUR_V,
`misc%machineryequipment` = SE455,
`misc%buildings` = SE450,
`misc%OrganicCode` = ORGANIC,
FADN[,
colnames(FADN)[grepl(colnames(FADN), pattern = "(^(SE).{3,4})$")],
with = FALSE
]
)
]
# Add `misc%` to Standard Results
colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")] <- paste0("misc.", colnames(new_fadn)[startsWith(colnames(new_fadn), "SE")])
# I use % as a separator, _ is used for some variables
# (meaning that if I separate with _,
# these var names would be split in two or more),
# so I can't use that one, but % causes troubles sometimes
# and somehow R/DT does not like it, dplyr does though
colnames(new_fadn) <- gsub("\\.", "%", colnames(new_fadn))
new_fadn[is.nan(new_fadn) | new_fadn == Inf | is.na(new_fadn)] <- 0
nuts3_yields <- FADNcrops_NUTS3 %>%
select(-c(NUTS0, NUTS1, NUTS2))
nuts0_yields <- FADNcrops_NUTS0 %>%
select(-c(NUTS1, NUTS2, NUTS3))
third_fadn <- base::Reduce(
function(...) merge(..., by = "NUTS0", all.x = TRUE),
list(new_fadn, nuts0_yields, defaultvalsNUTS0)
)
third_fadn[is.na(third_fadn)] <- 0
# Better with dplyr, reshaping data
# (no use of sep in DT, longer and more tedious)
# alternatively: reshape
third_fadn <- tibble(third_fadn)
test_dyn <- third_fadn %>%
pivot_longer(
cols = -c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02"
),
names_sep = "%",
names_to = c("item1", "item2")
) %>%
pivot_wider(id_cols = c(
"NUTS0", "ID", "COUNTRY", "TF14", "NUTS1", "NUTS2", "NUTS3", "SYS02", "item1"
), names_from = "item2", values_from = "value", values_fn = sum)
test_dyn[is.na(test_dyn)] <- 0
map2gui <- test_dyn %>%
pivot_longer(
cols = 10:length(colnames(test_dyn)),
names_to = "item2",
values_to = "value"
) %>%
pivot_wider(
id_cols = 1:8,
names_from = c("item1", "item2"),
values_from = "value",
names_sep = "%"
)
# Joining all data together
map2gui[] <- lapply(map2gui, function(x) if (is.factor(x)) as.factor(x) else x)
map2gui[is.na(map2gui)] <- 0
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)] <- lapply(
map2gui[colnames(map2gui) %in% paste0("global%", nonnumericals)],
as.character
)
# For each mapping, group by the mapping and summarise
# With this configuration, you could do a list of different mappings
# Create a list to store the results
map2gui_list <- vector("list", length = length(mapping))
message("Aggregating to ", length(mapping), " different mappings")
for (map in seq_along(mapping)) {
map2gui_list[[map]] <- map2gui %>%
dplyr::group_by(
across(mapping[[map]])
) %>%
select(-c(ID, COUNTRY, TF14, NUTS0, NUTS1, NUTS2, NUTS3)) %>%
summarise(
dplyr::across(
c(everything(), -SYS02),
~ if (is.numeric(.)) {
weighted.mean(., na.rm = TRUE, w = as.numeric(as.character(SYS02)))
} else {
Modes(.)[1]
}
),
`misc%nFarms` = n(),
# Weights are correct, tested
`misc%weights` = sum(as.numeric(as.character(SYS02)))
)
}
# Concatenate the items in the list of list mapping
mapping <- lapply(mapping, paste0, collapse = "_")
# Name the list elements
names(map2gui_list) <- paste("map2gui", mapping, sep = "_")
# Set the column name to farmIds
map2gui_list <- map2gui_list %>%
purrr::map2(
., seq_along(mapping), ~ dplyr::rename_with(
.x, ~"farmIds", all_of(.y)
)
)
# Add conditional variables to all NUTS regions
map2gui_list <- purrr::map(map2gui_list, ~ .x %>%
group_by(farmIds) %>%
mutate(
`global%nArabLand` = sum(across(paste0("misc%", german_crops))),
`global%farmbranchArable` = ifelse(`global%nArabLand` > 0, -1, -2)
))
# For all map2guiNUTSx, remove observations with n < 15
# for (i in 0:3) {
# assign(
# paste0("map2guiNUTS", i),
# assign(
# paste0("map2guiNUTS", i),
# get(paste0("map2guiNUTS", i))[get(paste0("map2guiNUTS", i))$`misc%nFarms` >= 15, ]
# )
# )
# }
# Condition to see if any of the rowSums of maxrot per farmIds is not equal to 1 and show farmId for which the condition is true
# might throw an NA there sometimes but it is not a true NA
# for (i in seq_along(map2gui_list)) {
# if (any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98) |
# any(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) > 1.02)) {
# warning(
# paste(map2gui_list[[i]][
# rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)]) < 0.98, ,
# ]$farmIds, "has a problem with maxrot (sum not equal to 1)\n")
# )
# }
# if (any(is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])))) {
# warning(
# paste(map2gui_list[[i]][
# is.nan(rowSums(map2gui_list[[i]][, colnames(map2gui_list[[i]]) %in% paste0("maxrot%", FADN2Dyn$crop_new)])), ,
# ]$farmIds, "has a problem with maxrot (NaN), check if there are any crops")
# )
# }
# }
for (i in seq_along(map2gui_list)) {
setDT(map2gui_list[[i]])
map2gui_list[[i]][, paste0("maxrot%", german_crops) := lapply(.SD, function(x) x / `global%nArabLand`), .SDcols = paste0("misc%", german_crops), by = farmIds]
# Convert nonumericals into numerics
cols_to_convert <- colnames(map2gui_list[[i]])[colnames(map2gui_list[[i]]) %in% paste0("global%", nonnumericals)]
map2gui_list[[i]][, (cols_to_convert) := lapply(.SD, as.numeric), .SDcols = cols_to_convert]
cols_to_melt <- setdiff(colnames(map2gui_list[[i]]), "farmIds")
# Pivot from wide to long and then wide again for all map2guiNUTS
map2gui_list[[i]] <- melt(map2gui_list[[i]], id.vars = "farmIds", measure.vars = cols_to_melt, variable.name = "item", value.name = "value")
map2gui_list[[i]][, c("item1", "item2") := tstrsplit(item, split = "%", fixed = TRUE)]
map2gui_list[[i]] <- dcast(map2gui_list[[i]], farmIds + item1 ~ item2, value.var = "value")
# Replace NAs with 0
map2gui_list[[i]][is.na(map2gui_list[[i]])] <- 0
}
# Add to gdx p_farmdata
if (save_gdx == TRUE) {
for (map in seq_along(map2gui_list)) {
gdxreshape(
map2gui_list[[map]],
symDim = 3, order = c(1, 2, 0),
gdxName = file.path(
"data",
"farmdata", paste("farmData", farmbranch, mapping[map], sep = "_")
),
symName = "p_farmData"
)
}
} else {
return(map2gui_list)
}
}