# Based on the work of Dimitrios Kremmydas (JRC) and Xinxin Yang (THÜNEN)
# raw table J: livestock
# col.codes.livestock.dt
# raw table K: production
# col.codes.anim_products.dt
# load raw data ----
raw.file <- paste0(get.data.dir(),"/rds","/fadn.raw.all.rds")
raw_data_test <- readRDS(file=raw.file) #82.42s
#read thw variables of a worksheet into R df
FADN_data_request_file <- "D:/data/fadn/lieferung_20210414/yang/FADN data request forms_2020_02_update - 20201212.xlsx"
FADN_data_request <- readxl::read_excel(FADN_data_request_file,
sheet = "Selection of variables",
skip = 2)
common.names.inExcel <- FADN_data_request$`COMMON name`
# raw_data_test <- copy(raw_data)
cols.names.fadn <- colnames(raw_data_test)
missing.names <- c()
for (common.name in common.names.inExcel){
if (!common.name %in% cols.names.fadn){
rm(common.names.inExcel,FADN_data_request_file, FADN_data_request)
missing.names <- unique(missing.names)
## add missing variables with 0 value
raw_data_test[,c(missing.names) := 0]
# load livestocks from raw data ----
cols.to.load = c("ID",col.codes.livestock.dt[!code.num=="",common.name])
# only average number of animals
fadn.info <- c("NUTS1","NUTS2","NUTS3","SYS02")
tmp.LOAD.raw <- copy(raw_data_test[,c("YEAR","COUNTRY",fadn.info,cols.to.load),with=FALSE])
# system.time({tmp.LOAD.raw %>% pivot_longer(!c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"),
# names_to = "variable",
# values_to = "value") %>% filter(value != 0)}) #88.76s
# setnames(tmp.LOAD.raw,c("load.YEAR","load.COUNTRY"),c("YEAR","COUNTRY"))
tmp.LOAD.raw.long <- melt.data.table(tmp.LOAD.raw,
id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# system.time({
# melt(tmp.LOAD.raw,
# id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })#26.40
# system.time({
# melt(tmp.LOAD.raw,
# id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })
rm(tmp.LOAD.raw)
# for depreciated variables (2013 and before that are not found in 2014 and after; end at _X)
system.time({
tmp.LOAD.raw.long[
grep("_X$",variable),
":="(
ANIM=gsub("^(.+?)_(.+?)(_.*)?$","\\1",variable),
VAR=gsub("^(.+?)_(.+?)(_.*)?$","\\2",variable)
)]
tmp.LOAD.raw.long[
!grep("_X$",variable),
":="(
ANIM=gsub("^(.+)_(.+)$","\\1",variable),
VAR=gsub("^(.+)_(.+)$","\\2",variable)
)]
})# 41.81
vars.to.keep = c("AN","ALU","ON","OV","CN","CV","PV",
"PN","SV","SN","FCV","FUV","TO","SRV","SRN",
"SSN","SSV","SUN","SUV","SYS02")
TABLE_J_RAW = data.table::dcast(tmp.LOAD.raw.long[VAR%in%vars.to.keep]
,COUNTRY+ID+YEAR+NUTS1+NUTS2+NUTS3+ANIM~VAR,
value.var = "value",fill=0)
TABLE_J.all <- TABLE_J_RAW
TABLE_J.all$NUTS0 <- TABLE_J.all$COUNTRY
rm(TABLE_J_RAW)
tmp1 = dcast(TABLE_J.all[ANIM%in%c("LBOV0","LBOVFAT"),
.(COUNTRY,ID,ANIM,YEAR,value=AN)],COUNTRY+ID+YEAR~ANIM,
fill=0)
tmp1[,":="(LBOV0.perc=LBOV0/(LBOV0+LBOVFAT),LBOVFAT.perc=LBOV0/(LBOV0+LBOVFAT))]
# tmp1[is.na(LBOV0.perc),LBOV0.perc:=0]
# tmp1[is.na(LBOVFAT.perc),LBOVFAT.perc:=0]
# tmp2 = merge(tmp1,TABLE_AB.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)],by="ID")
tmp2 = merge(tmp1,TABLE_J.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)], by="ID",allow.cartesian=TRUE)
library(Hmisc)
tmp3=rbindlist(use.names=T,list(
tmp2[,.(value=median(LBOV0.perc),TYPE="ID"),by=.(GEO=ID)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS0"),by=.(GEO=NUTS0)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS1"),by=.(GEO=NUTS1)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS2"),by=.(GEO=NUTS2)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS3"),by=.(GEO=NUTS3)]
))
#Get from NUTS2 the share for each farm
tmp4 = unique(merge(
unique(TABLE_J.all[ANIM%in%c("LBOV1"),.(COUNTRY,ID,YEAR)]),
TABLE_J.all[,.(ID,YEAR,NUTS0,NUTS1,NUTS2,NUTS3)],
by=c("ID","YEAR")
))[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3)]
tmp5 = merge(
tmp4,
tmp3[TYPE=="ID",.(ID=as.numeric(GEO),LBOV0.perc=value)],
all.x=T,by="ID"
)
tmp6 = rbindlist(use.names=T,list(
tmp5[!is.na(LBOV0.perc)],
merge(
tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS3",.(NUTS3=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS3"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp6[!is.na(LBOV0.perc)],
merge(
tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS2",.(NUTS2=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS2"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp5[!is.na(LBOV0.perc)],
merge(
tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS1",.(NUTS1=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS1"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp6[!is.na(LBOV0.perc)],
merge(
tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS0",.(NUTS0=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS0"
)
))
BOV1_PERC = tmp6[,.(LBOV0.perc=mean(LBOV0.perc)),by=ID]
# The definition of activities
ANIMALS_IFM_CAP.all = rbindlist(use.names = T,list(
#2014 and after
get.ifm_cap.animals("AN",2004:2018),
get.ifm_cap.animals("SN",2004:2018),
get.ifm_cap.animals("SV",2004:2018),
# get.ifm_cap.animals("SSN",2004:2018),# SSN not found
# get.ifm_cap.animals("SSV",2004:2018),# not found
# get.ifm_cap.animals("SRV",2004:2018),# not found
# get.ifm_cap.animals("SUN",2004:2018),# not found
# get.ifm_cap.animals("SUV",2004:2018),# not found
get.ifm_cap.animals("PN",2004:2018),
get.ifm_cap.animals("PV",2004:2018),
get.ifm_cap.animals("ON",2004:2018),
get.ifm_cap.animals("OV",2004:2018),
get.ifm_cap.animals("CN",2004:2018),
get.ifm_cap.animals("CV",2004:2018)
))
rm(list=c("tmp1","tmp2","tmp3","tmp4","tmp5","tmp6"))
# setnames(ANIMALS_IFM_CAP.all,"FD","ID")