# Based on the work of Dimitrios Kremmydas (JRC) and Xinxin Yang (THÜNEN)
source("../R/fadntocapri/raw_data_codes_livestock.R")
# raw table J: livestock
# col.codes.livestock.dt
# raw table K: production
# col.codes.anim_products.dt
# Animals ----------------------------------------------------------------
## load raw data ===========================================================
# raw.file <- paste0(get.data.dir(),"/rds","/fadn.raw.all.rds")
# check if raw data exist
if(length(fadn.countries)==1) {
fadn.raw.rds.avail <- get.available.fadn.raw.rds()[COUNTRY %in% fadn.countries]
}
if(nrow(fadn.raw.rds.avail)==0){
cat("Raw data does not exist, converting raw data ...\n")
convert.raw(countries = fadn.countries)
}
if(length(fadn.countries)==1) {
fadn.raw.rds.avail <- get.available.fadn.raw.rds()[COUNTRY %in% fadn.countries]
# check if str data exist
fadn.str.rds.avail <- get.available.fadn.str.rds(extract_dir = "forcapri")[COUNTRY %in% fadn.countries]
if(nrow(fadn.str.rds.avail)==0){
cat("Str data does not exist, converting str data ...\n")
convert.str(countries = fadn.countries)
}
# raw_data_test <- load.raw(fadn.countries)
cols.names.fadn <- colnames(raw_data)
missing.names <- load.FADN.data.request.file(cols.names.fadn)
# load livestocks from raw data ===========================================================
cols.to.load = c("ID",col.codes.livestock.dt[!code.num=="",common.name])
# only average number of animals
fadn.info <- c("NUTS1","NUTS2","NUTS3","SYS02")
tmp.LOAD.raw <- copy(raw_data[,c("YEAR","COUNTRY",fadn.info,cols.to.load),with=FALSE])
# system.time({tmp.LOAD.raw %>% pivot_longer(!c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"),
# names_to = "variable",
# values_to = "value") %>% filter(value != 0)}) #88.76s
# setnames(tmp.LOAD.raw,c("load.YEAR","load.COUNTRY"),c("YEAR","COUNTRY"))
tmp.LOAD.raw.long <- melt.data.table(tmp.LOAD.raw,
id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# system.time({
# melt(tmp.LOAD.raw,
# id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })#26.40
# system.time({
# melt(tmp.LOAD.raw,
# id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })
rm(tmp.LOAD.raw)
# for depreciated variables (2013 and before that are not found in 2014 and after; end at _X)
system.time({
tmp.LOAD.raw.long[
grep("_X$",variable),
":="(
ANIM=gsub("^(.+?)_(.+?)(_.*)?$","\\1",variable),
VAR=gsub("^(.+?)_(.+?)(_.*)?$","\\2",variable)
)]
tmp.LOAD.raw.long[
!grep("_X$",variable),
":="(
ANIM=gsub("^(.+)_(.+)$","\\1",variable),
VAR=gsub("^(.+)_(.+)$","\\2",variable)
)]
})# 41.81
vars.to.keep = c("AN","ALU","ON","OV","CN","CV","PV",
"PN","SV","SN","FCV","FUV","TO","SRV","SRN",
"SSN","SSV","SUN","SUV","SYS02")
TABLE_J_RAW = data.table::dcast(tmp.LOAD.raw.long[VAR%in%vars.to.keep]
,COUNTRY+ID+YEAR+NUTS1+NUTS2+NUTS3+ANIM~VAR,
value.var = "value",fill=0)
# check columns
cols.used_ifm_cap = c("AN","SV","SN","SRN","PN","PV","ON","OV","CN","CV")
for(col.used.ifm in cols.used_ifm_cap) {
if(!col.used.ifm%in%names(TABLE_J.all)){
warning(paste0("nCreating column ",col.used.ifm))
TABLE_J.all[,(col.used.ifm):=0]
}
}
TABLE_J.all$NUTS0 <- TABLE_J.all$COUNTRY
rm(TABLE_J_RAW)
tmp1 = dcast(TABLE_J.all[ANIM%in%c("LBOV0","LBOVFAT"),
.(COUNTRY,ID,ANIM,YEAR,value=AN)],COUNTRY+ID+YEAR~ANIM,
fill=0)
if(!"LBOVFAT" %in% colnames(tmp1))
{
cat("LBOVFAT is not in there!\n");
tmp1$LBOVFAT = 0
}
if(!"LBOV0" %in% colnames(tmp1))
{
cat("LBOV0 is not in there!\n");
tmp1$LBOV0 = 0
}
tmp1[,":="(LBOV0.perc=LBOV0/(LBOV0+LBOVFAT),LBOVFAT.perc=LBOV0/(LBOV0+LBOVFAT))]
tmp1[is.na(LBOV0.perc),LBOV0.perc:=0]
tmp1[is.na(LBOVFAT.perc),LBOVFAT.perc:=0]
# tmp2 = merge(tmp1,TABLE_AB.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)],by="ID")
tmp2 = merge(tmp1,TABLE_J.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)], by="ID",allow.cartesian=TRUE)
library(Hmisc)
tmp3=rbindlist(use.names=T,list(
tmp2[,.(value=median(LBOV0.perc),TYPE="ID"),by=.(GEO=ID)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS0"),by=.(GEO=NUTS0)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS1"),by=.(GEO=NUTS1)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS2"),by=.(GEO=NUTS2)],
tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS3"),by=.(GEO=NUTS3)]
))
#Get from NUTS2 the share for each farm
tmp4 = unique(merge(
unique(TABLE_J.all[ANIM%in%c("LBOV1"),.(COUNTRY,ID,YEAR)]),
TABLE_J.all[,.(ID,YEAR,NUTS0,NUTS1,NUTS2,NUTS3)],
by=c("ID","YEAR")
))[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3)]
tmp5 = merge(
tmp4,
tmp3[TYPE=="ID",.(ID=as.numeric(GEO),LBOV0.perc=value)],
all.x=T,by="ID"
)
tmp6 = rbindlist(use.names=T,list(
tmp5[!is.na(LBOV0.perc)],
merge(
tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS3",.(NUTS3=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS3"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp6[!is.na(LBOV0.perc)],
merge(
tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS2",.(NUTS2=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS2"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp5[!is.na(LBOV0.perc)],
merge(
tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS1",.(NUTS1=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS1"
)
))
tmp6 = rbindlist(use.names=T,list(
tmp6[!is.na(LBOV0.perc)],
merge(
tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
tmp3[TYPE=="NUTS0",.(NUTS0=GEO,LBOV0.perc=value)],
all.x=T,by="NUTS0"
)
))
BOV1_PERC = tmp6[,.(LBOV0.perc=mean(LBOV0.perc)),by=ID]