GitLab at IIASA

animals.R 7.42 KiB
Newer Older
Xinxin Yang's avatar
Xinxin Yang committed
# livestocks

source("D:/data/fadn/lieferung_20210414/yang/FadntoCapri/raw_data_codes_livestock.R")

# raw table J: livestock
# col.codes.livestock.dt
# raw table K: production
# col.codes.anim_products.dt

Xinxin Yang's avatar
Xinxin Yang committed
# load raw data ----
raw.file <- paste0(get.data.dir(),"/rds","/fadn.raw.all.rds")
raw_data_test <- readRDS(file=raw.file) #82.42s
Xinxin Yang's avatar
Xinxin Yang committed


Xinxin Yang's avatar
Xinxin Yang committed
#read thw variables of a worksheet into R df
FADN_data_request_file <- "D:/data/fadn/lieferung_20210414/yang/FADN data request forms_2020_02_update - 20201212.xlsx"
Xinxin Yang's avatar
Xinxin Yang committed

Xinxin Yang's avatar
Xinxin Yang committed
FADN_data_request <- readxl::read_excel(FADN_data_request_file,
                                        sheet = "Selection of variables",
                                        skip = 2)
common.names.inExcel <- FADN_data_request$`COMMON name`
# raw_data_test <- copy(raw_data)
cols.names.fadn <- colnames(raw_data_test)
Xinxin Yang's avatar
Xinxin Yang committed
#  count = 0
Xinxin Yang's avatar
Xinxin Yang committed
missing.names <- c()
for (common.name in common.names.inExcel){
  if (!common.name %in% cols.names.fadn){
    
    missing.names <- append(missing.names, common.name)
Xinxin Yang's avatar
Xinxin Yang committed
  }
Xinxin Yang's avatar
Xinxin Yang committed
  
  
Xinxin Yang's avatar
Xinxin Yang committed
}
Xinxin Yang's avatar
Xinxin Yang committed
rm(common.names.inExcel,FADN_data_request_file, FADN_data_request)
missing.names <- unique(missing.names)

## add missing variables with 0 value
raw_data_test[,c(missing.names) := 0]
Xinxin Yang's avatar
Xinxin Yang committed

# load livestocks from raw data ----
cols.to.load = c("ID",col.codes.livestock.dt[!code.num=="",common.name])

#  only average number of animals
fadn.info <- c("NUTS1","NUTS2","NUTS3","SYS02")

tmp.LOAD.raw <- copy(raw_data_test[,c("YEAR","COUNTRY",fadn.info,cols.to.load),with=FALSE])
# system.time({tmp.LOAD.raw  %>% pivot_longer(!c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"),
#                                names_to = "variable",
#                                values_to = "value") %>% filter(value != 0)}) #88.76s

# setnames(tmp.LOAD.raw,c("load.YEAR","load.COUNTRY"),c("YEAR","COUNTRY"))

Xinxin Yang's avatar
Xinxin Yang committed

tmp.LOAD.raw.long <- melt.data.table(tmp.LOAD.raw,
                                     id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# system.time({
#   melt(tmp.LOAD.raw,
#                   id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })#26.40
# system.time({
# melt(tmp.LOAD.raw,
#                 id.vars = c("ID","YEAR","COUNTRY","NUTS1","NUTS2","NUTS3"))[value!=0]
# })
Xinxin Yang's avatar
Xinxin Yang committed
rm(tmp.LOAD.raw)

# for depreciated variables (2013 and before that are not found in 2014 and after; end at _X)
system.time({
  tmp.LOAD.raw.long[
    grep("_X$",variable),
    ":="(
      ANIM=gsub("^(.+?)_(.+?)(_.*)?$","\\1",variable),
      VAR=gsub("^(.+?)_(.+?)(_.*)?$","\\2",variable)
    )]

  tmp.LOAD.raw.long[
    !grep("_X$",variable),
    ":="(
      ANIM=gsub("^(.+)_(.+)$","\\1",variable),
      VAR=gsub("^(.+)_(.+)$","\\2",variable)
    )]
})# 41.81
Xinxin Yang's avatar
Xinxin Yang committed


Xinxin Yang's avatar
Xinxin Yang committed
vars.to.keep = c("AN","ALU","ON","OV","CN","CV","PV",
                 "PN","SV","SN","FCV","FUV","TO","SRV","SRN",
                 "SSN","SSV","SUN","SUV","SYS02")


TABLE_J_RAW = data.table::dcast(tmp.LOAD.raw.long[VAR%in%vars.to.keep]
                                ,COUNTRY+ID+YEAR+NUTS1+NUTS2+NUTS3+ANIM~VAR,
                                value.var = "value",fill=0)


TABLE_J.all <- TABLE_J_RAW
TABLE_J.all$NUTS0 <- TABLE_J.all$COUNTRY
rm(TABLE_J_RAW)



tmp1 = dcast(TABLE_J.all[ANIM%in%c("LBOV0","LBOVFAT"),
                         .(COUNTRY,ID,ANIM,YEAR,value=AN)],COUNTRY+ID+YEAR~ANIM,
             fill=0)

tmp1[,":="(LBOV0.perc=LBOV0/(LBOV0+LBOVFAT),LBOVFAT.perc=LBOV0/(LBOV0+LBOVFAT))]
# tmp1[is.na(LBOV0.perc),LBOV0.perc:=0]
# tmp1[is.na(LBOVFAT.perc),LBOVFAT.perc:=0]


# tmp2 = merge(tmp1,TABLE_AB.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)],by="ID")
tmp2 = merge(tmp1,TABLE_J.all[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3,SYS02)], by="ID",allow.cartesian=TRUE)

library(Hmisc)
tmp3=rbindlist(use.names=T,list(
  tmp2[,.(value=median(LBOV0.perc),TYPE="ID"),by=.(GEO=ID)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS0"),by=.(GEO=NUTS0)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS1"),by=.(GEO=NUTS1)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS2"),by=.(GEO=NUTS2)],
  tmp2[,.(value=wtd.quantile(LBOV0.perc,SYS02,0.5),TYPE="NUTS3"),by=.(GEO=NUTS3)]
))

#Get from NUTS2 the share for each farm
tmp4 = unique(merge(
  unique(TABLE_J.all[ANIM%in%c("LBOV1"),.(COUNTRY,ID,YEAR)]),
  TABLE_J.all[,.(ID,YEAR,NUTS0,NUTS1,NUTS2,NUTS3)],
  by=c("ID","YEAR")
))[,.(ID,NUTS0,NUTS1,NUTS2,NUTS3)]

tmp5 = merge(
  tmp4,
  tmp3[TYPE=="ID",.(ID=as.numeric(GEO),LBOV0.perc=value)],
  all.x=T,by="ID"
)

tmp6 = rbindlist(use.names=T,list(
  tmp5[!is.na(LBOV0.perc)],
  merge(
    tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS3",.(NUTS3=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS3"
  )
))

tmp6 = rbindlist(use.names=T,list(
  tmp6[!is.na(LBOV0.perc)],
  merge(
    tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS2",.(NUTS2=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS2"
  )
))

tmp6 = rbindlist(use.names=T,list(
  tmp5[!is.na(LBOV0.perc)],
  merge(
    tmp5[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS1",.(NUTS1=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS1"
  )
))


tmp6 = rbindlist(use.names=T,list(
  tmp6[!is.na(LBOV0.perc)],
  merge(
    tmp6[is.na(LBOV0.perc),-"LBOV0.perc"],
    tmp3[TYPE=="NUTS0",.(NUTS0=GEO,LBOV0.perc=value)],
    all.x=T,by="NUTS0"
  )
))

BOV1_PERC = tmp6[,.(LBOV0.perc=mean(LBOV0.perc)),by=ID]


# The definition of activities

ANIMALS_IFM_CAP.all = rbindlist(use.names = T,list(

  #2014 and after
  get.ifm_cap.animals("AN",2004:2018),
  get.ifm_cap.animals("SN",2004:2018),
  get.ifm_cap.animals("SV",2004:2018),
  # get.ifm_cap.animals("SSN",2004:2018),# SSN not found
Xinxin Yang's avatar
Xinxin Yang committed
  # get.ifm_cap.animals("SSV",2004:2018),# not found 
Xinxin Yang's avatar
Xinxin Yang committed
  get.ifm_cap.animals("SRN",2004:2018),
Xinxin Yang's avatar
Xinxin Yang committed
  # get.ifm_cap.animals("SRV",2004:2018),# not found
  # get.ifm_cap.animals("SUN",2004:2018),# not found
  # get.ifm_cap.animals("SUV",2004:2018),# not found
Xinxin Yang's avatar
Xinxin Yang committed
  get.ifm_cap.animals("PN",2004:2018),
  get.ifm_cap.animals("PV",2004:2018),
  get.ifm_cap.animals("ON",2004:2018),
  get.ifm_cap.animals("OV",2004:2018),
  get.ifm_cap.animals("CN",2004:2018),
  get.ifm_cap.animals("CV",2004:2018)

))

rm(list=c("tmp1","tmp2","tmp3","tmp4","tmp5","tmp6"))

# setnames(ANIMALS_IFM_CAP.all,"FD","ID")
# str(ANIMALS_IFM_CAP.all)

# str(str_data$info)
# vars = c("ID","WEIGHT", "COUNTRY","YEAR","TF8","TF14","SIZ6","REGION","NUTS1","NUTS2","NUTS3", "ORGANIC")
Xinxin Yang's avatar
Xinxin Yang committed
ANIMALS_FADN <- ANIMALS_IFM_CAP.all %>% as.data.table() %>% mutate(ID=as.character(ID))

rm(list=c("ANIMALS_IFM_CAP.all", "raw_data_test"))

str_data <- readRDS(paste0(get.data.dir(),"/rds/str_dir/fadn.str.all.rds"))
str_data_info <- str_data$info
rm(list="str_data")
Xinxin Yang's avatar
Xinxin Yang committed


ANIMALS_FADN_ID_YEAR <- expand_grid(ANIMALS_FADN %>% select(variable,ANIM) %>% unique(),
                                    str_data_info %>% select(ID,YEAR) %>% unique())
Xinxin Yang's avatar
Xinxin Yang committed
ANIMALS_FADN_ID_YEAR <- ANIMALS_FADN_ID_YEAR %>% as.data.table()

Xinxin Yang's avatar
Xinxin Yang committed

# ANIMALS_FADN_ID_YEAR is a subset of ANIMALS_FADN without column "value"????
# identical(ANIMALS_FADN %>% select(-value) %>% mutate(ID = as.factor(ID)), ANIMALS_FADN_ID_YEAR)

Xinxin Yang's avatar
Xinxin Yang committed
select_str_info <-  str_data_info[,c("ID","WEIGHT", "COUNTRY","YEAR",
                                     "TF8","TF14","SIZ6","REGION","NUTS1","NUTS2","NUTS3", "ORGANIC")]
# system.time({
# 
# animal.dt <- right_join(ANIMALS_FADN_ID_YEAR, 
#                         select_str_info)
# 
# animal.dt <- left_join(animal.dt, ANIMALS_FADN)
# 
# }) #216s

# system.time({
  
animal.dt <- ANIMALS_FADN_ID_YEAR[select_str_info, on = c("ID", "YEAR")]
animal.dt <- ANIMALS_FADN[animal.dt , on = c("variable", "ANIM", "ID", "YEAR")]
  
# })#78s


# all.equal(animal.dt1, animal.dt)