Xinxin Yang's avatar
Xinxin Yang committed
# livestocks

source("D:/data/fadn/lieferung_20210414/yang/FadntoCapri/raw_data_codes_livestock.R")

# raw table J: livestock
# col.codes.livestock.dt
# raw table K: production
# col.codes.anim_products.dt


commonnames.Check = FALSE
if( commonnames.Check){
  # load raw data ----
  system.time (raw_data <- readRDS(file=paste0(get.data.dir(),"/rds","/fadn.raw.all.rds"))) #82.42s
  setnames(raw_data,c("load.YEAR","load.COUNTRY"),c("YEAR","COUNTRY"))
  # read common names in excel, add common name if not in raw data ----

  #read thw variables of a worksheet into R df
  FADN_data_request_file <- "D:/data/fadn/lieferung_20210414/yang/FADN data request forms_2020_02_update - 20201212.xlsx"

  FADN_data_request <- readxl::read_excel(FADN_data_request_file, sheet = "Selection of variables", skip = 2)
  common.names <- FADN_data_request$`COMMON name` %>% as.list() %>% unlist()

  raw_data_test <- copy(raw_data)
  cols.names.fadn <- colnames(raw_data_test)
#  count = 0
  missing.names <- c()
  for (common.name in common.names){
    if (!common.name %in% cols.names.fadn){

      missing.names <- append(missing.names, common.name)
    }


  }
  missing.names <- unique(missing.names)


  ## add missing variables with 0 value
  system.time({
    raw_data_test[,c(missing.names) := 0]

  saveRDS(raw_data_test,paste0(get.data.dir(),"/rds/fadn.raw.all_commomnname.rds") )

  })
}else {

  raw_data <- readRDS(paste0(get.data.dir(),"/rds/fadn.raw.all_commomnname.rds")) #
}

# load livestocks from raw data ----
cols.to.load = c("ID",col.codes.livestock.dt[!code.num=="",common.name])

#  only average number of animals
fadn.info <- c("NUTS1","NUTS2","NUTS3","SYS02")

raw_data_test <- copy(raw_data)

tmp.LOAD.raw <- copy(raw_data_test[,c("YEAR","COUNTRY",fadn.info,cols.to.load),with=FALSE])
# system.time({tmp.LOAD.raw  %>% pivot_longer(!c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"),
#                                names_to = "variable",
#                                values_to = "value") %>% filter(value != 0)}) #88.76s

# setnames(tmp.LOAD.raw,c("load.YEAR","load.COUNTRY"),c("YEAR","COUNTRY"))
system.time({
  tmp.LOAD.raw.long <- melt.data.table(tmp.LOAD.raw,
                                       id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
})#65.46 s

rm(tmp.LOAD.raw)

# for depreciated variables (2013 and before that are not found in 2014 and after; end at _X)
system.time({
  tmp.LOAD.raw.long[
    grep("_X$",variable),
    ":="(
      ANIM=gsub("^(.+?)_(.+?)(_.*)?$","\\1",variable),
      VAR=gsub("^(.+?)_(.+?)(_.*)?$","\\2",variable)
    )]

  tmp.LOAD.raw.long[
    !grep("_X$",variable),
    ":="(
      ANIM=gsub("^(.+)_(.+)$","\\1",variable),
      VAR=gsub("^(.+)_(.+)$","\\2",variable)
    )]
})# 41.81
vars.to.keep = c("AN","ALU","ON","OV","CN","CV","PV",
                 "PN","SV","SN","FCV","FUV","TO","SRV","SRN",
                 "SSN","SSV","SUN","SUV","SYS02")


TABLE_J_RAW = data.table::dcast(tmp.LOAD.raw.long[VAR%in%vars.to.keep]
                                ,COUNTRY+ID+YEAR+NUTS1+NUTS2+NUTS3+ANIM~VAR,
                                value.var = "value",fill=0)


TABLE_J.all <- TABLE_J_RAW
TABLE_J.all$NUTS0 <- TABLE_J.all$COUNTRY
rm(TABLE_J_RAW)



tmp1 = dcast(TABLE_J.all[ANIM%in%c("LBOV0","LBOVFAT"),
                         .(COUNTRY,ID,ANIM,YEAR,value=AN)],COUNTRY+ID+YEAR~ANIM,
             fill=0)

tmp1[,":="(LBOV0.perc=LBOV0/(LBOV0+LBOVFAT),LBOVFAT.perc=LBOV0/(LBOV0+LBOVFAT))]
# tmp1[is.na(LBOV0.perc),LBOV0.perc:=0]
# tmp1[is.na(LBOVFAT.perc),LBOVFAT.perc:=0]


# tmp2 = merge(tmp1,TABLE_AB.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)],by="ID")
tmp2 = merge(tmp1,TABLE_J.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)], by="ID",allow.cartesian=TRUE)

library(Hmisc)
tmp3=rbindlist(use.names=T,list(
  tmp2[,.(value=median(LBOV0.perc),TYPE="ID"),by=.(GEO=ID)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS0"),by=.(GEO=NUTS0)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS1"),by=.(GEO=NUTS1)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS2"),by=.(GEO=NUTS2)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS3"),by=.(GEO=NUTS3)]
))

#Get from NUTS2 the share for each farm
tmp4 = unique(merge(
  unique(TABLE_J.all[ANIM%in%c("LBOV1"),.(COUNTRY,ID,YEAR)]),
  TABLE_J.all[,.(ID,YEAR,NUTS0,NUTS1,NUTS2,NUTS3)],
  by=c("ID","YEAR")
))[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3)]

tmp5 = merge(
  tmp4,
  tmp3[TYPE=="ID",.(ID=as.numeric(GEO),LBOV0.perc=value)],
  all.x=T,by="ID"
)

tmp6 = rbindlist(use.names=T,list(
  tmp5[!is.na(LBOV0.perc)],
  merge(
    tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS3",.(NUTS3=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS3"
  )
))

tmp6 = rbindlist(use.names=T,list(
  tmp6[!is.na(LBOV0.perc)],
  merge(
    tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS2",.(NUTS2=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS2"
  )
))

tmp6 = rbindlist(use.names=T,list(
  tmp5[!is.na(LBOV0.perc)],
  merge(
    tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS1",.(NUTS1=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS1"
  )
))


tmp6 = rbindlist(use.names=T,list(
  tmp6[!is.na(LBOV0.perc)],
  merge(
    tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS0",.(NUTS0=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS0"
  )
))

BOV1_PERC = tmp6[,.(LBOV0.perc=mean(LBOV0.perc)),by=ID]


# The definition of activities

ANIMALS_IFM_CAP.all = rbindlist(use.names = T,list(

  #2014 and after
  get.ifm_cap.animals("AN",2004:2018),
  get.ifm_cap.animals("SN",2004:2018),
  get.ifm_cap.animals("SV",2004:2018),
  # get.ifm_cap.animals("SSN",2004:2018),# SSN not found
  # get.ifm_cap.animals("SSV",2004:2018),
  get.ifm_cap.animals("SRN",2004:2018),
  # get.ifm_cap.animals("SRV",2004:2018),
  # get.ifm_cap.animals("SUN",2004:2018),
  # get.ifm_cap.animals("SUV",2004:2018),
  get.ifm_cap.animals("PN",2004:2018),
  get.ifm_cap.animals("PV",2004:2018),
  get.ifm_cap.animals("ON",2004:2018),
  get.ifm_cap.animals("OV",2004:2018),
  get.ifm_cap.animals("CN",2004:2018),
  get.ifm_cap.animals("CV",2004:2018)

))