...@@ -4,3 +4,5 @@ vignettes/ ...@@ -4,3 +4,5 @@ vignettes/
inst/examples/use_case.Rmd inst/examples/use_case.Rmd
README.html README.html
.Rproj.user .Rproj.user
.svn
README_backup.md
12
12
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
# Calculate Standard Results SEs --------------------------------------
fadn.calculateSE=function(data) {
seData=data.table(FID=data$tableAI$FID, SYS02=data$tableAI$WEIGHT)
#Total labour input of holding SE010----
#expressed in annual work units = full-time person equivalents.
#For casual unpaid labour:
#AVEHRS1 = { [C13(4)..17(4)] } / { [C13(3)..17(3)] }
#For casual paid labour:
#AVEHRS2 = { C19(4) + C20(4) } / { C19(3) + C20(3) }
#IF AVEHRS1 > 0 then A = #77 / AVEHRS1 for casual unpaid labour
#IF AVEHRS2 > 0 then C = #83 / AVEHRS2 for casual paid labour
#//todo
# seData$SE010=getFormulaResult(data,
# seData,
# "%3+#57+#61+#65+#69+#72+#75+#79+#81+"
# );
#SE011 Unpaid labour input ----
seData$SE011=getFormulaResult(data,seData,"#54+#58+#62+#66+#70+#73+#76+#77+#80+#82+#83");
#SE021 Paid labour Input -----
#Time worked in hours by paid labour input on holding.
seData$SE021=getFormulaResult(data,seData,"#80+#82+#83");
#SE025 Total Utilised Agricultural Area ----
seData$SE025=getFormulaResult(data,
seData,
"#48+#49+#50");
#SE030 Rented U.A.A. -----
seData$SE030=getFormulaResult(data,seData,"#49");
#SE035 Cereals -----
seData$SE030=getFormulaResult(data,seData,"K120..148(4)");
#SE041 Other field crops -----
seData$SE041=getFormulaResult(data,seData,"K129..135(4)+K142(4)+K143(4)");
#SE042 Energy crops -----
seData$SE042=getFormulaResult(data,seData,"K129..133(4:2=10)+K135(4:2=10)+K144..145(4:2=10)+
K147..148(4:2=10)+K150(4:2=10)+
K158(4:2=10)+K160..161(4:2=10)+
K284(4:2=10)+K304(4:2=10)+K330..334(4:2=10)+
K345..348(4:2=10)+K360..364(4:2=10)");
#SE046 Vegetables and flowers -----
seData$SE046=getFormulaResult(data,seData,"K136..138(4)+K140(4)+K141(4)");
#SE050 Vineyards -----
seData$SE050=getFormulaResult(data,seData,"K155(4)");
#SE054 Permanent crops -----
seData$SE054=getFormulaResult(data,seData,"K152..154(4)+K156..158(4)");
#SE055 Orchards -----
seData$SE055=getFormulaResult(data,seData,"K152..153(4)");
#SE060 Olive groves -----
seData$SE060=getFormulaResult(data,seData,"K154(4)");
#SE065 Other permanent crops -----
seData$SE065=getFormulaResult(data,seData,"K156..158(4)");
#SE071 Forage crops -----
seData$SE071=getFormulaResult(data,seData,"K144..145(4)+K147(4)+K150..151(4)");
#SE072 Agricultural fallows -----
seData$SE072=getFormulaResult(data,seData,"K146(4:2=1&3=(0,1,2,3,4,9,10))");
#SE073 Set aside -----
seData$SE073=getFormulaResult(data,seData,"K146(4:2=1&3=(5,6,7,8))");
#SE074 Total agricultural area out of production -----
seData$SE074=getFormulaResult(data,seData,"SE072+SE073+K314(4)");
#SE075 Woodland area -----
seData$SE075=getFormulaResult(data,seData,"K173(4)");
#SE080 to SE105 TODO Livestock ----
#SE110 to SE125 TODO YIELDS ----
#SE135 Total crop output ----
seData$SE135=getFormulaResult(data,
seData,
"K120..148(7..10)-K120..148(6)+
K150..161(7..10)-K150..161(6)");
#SE110 to SE125 TODO YIELDS ----
seData$SE075=getFormulaResult(data,seData,"K173(4)");
#SE206 Total livestock output ----
seData$SE206=getFormulaResult(data,
seData,
"#231+#232+#234+#235+#237+#238+#240+#241+#243+#244+#246+#247
+#249+#250+#252+#253
-#233-#236-#239-#242-#245-#248-#251-#254
+K162..171(7..10)-K162..171(6)+K313(7..10)-K313(6)");
#SE256 Other output ----
seData$SE256=getFormulaResult(data,
seData,
"K149(7..10)+K172..181(7..10)");
#SE131 Total output ----
seData$SE131=getFormulaResult(data,seData,"SE135+SE206+S256");
#SE275 Total Intermediate consumption
seData$SE275=getFormulaResult(data,seData,"#260..282+#284+#287");
#SE611 Compensatory Payments ----
seData$SE611=getFormulaResult(data,seData,"M602..614(5)+M618(5)+M622..629(5)+M632..634(5)+M638(5)+M655(5)");
#SE612 Set aside premiums ----
seData$SE612=getFormulaResult(data,seData,"M650(5)");
#SE613 Other Crop subsidies ----
seData$SE613=getFormulaResult(data,seData,"J120..145(2)+J147..161(2)+J185(2)+J281..284(2)+J296..301(2)+J326..357(2)+J360..374(2)+J952(2)");
#SE610 Total subsidies on crops ----
seData$SE610=getFormulaResult(data,seData,"SE611+SE612+SE613");
#SE616 Subsidies on dayring ----
seData$SE616=getFormulaResult(data,seData,"J30(2)+J162(2)+J163(2)+M770(5)-L401(10)");
#SE617 Subsidies other cattle ----
seData$SE617=getFormulaResult(data,seData,"J23..29(2)+J31..32(2)+J52(2)+J307(2)+M700(5)");
#SE618 Subsidies sheeps & goats ----
seData$SE618=getFormulaResult(data,seData,"K38..41(2)+J54..55(2)+J164..168(2)+J308(2)");
#SE619 Other livestock subsidies ----
seData$SE619=getFormulaResult(data,seData,"J22(2)+J33..34(2)+j43..51(2)+J56..58(2)+J169..171(2)+J309..311(2)+J313(2)+J951(2)");
#SE615 Total subsidies on livestock ----
seData$SE615=getFormulaResult(data,seData,"SE616+SE617+SE618+SE619");
#SE621 Environmental subsidies ----
seData$SE621=getFormulaResult(data,seData,"J800(2)+J810(2)");
#SE622 LFA subsidies ----
seData$SE622=getFormulaResult(data,seData,"J820(2)");
#SE623 Other Rural Development subsidies ----
seData$SE623=getFormulaResult(data,seData,"J830(2)+J835(2)+J840(2)+J900(2)+J910(2)+J953(2)");
#SE624 Total support on rural development
seData$SE624=getFormulaResult(data,seData,"SE621+SE622+SE623+J173..176(2)+J179(2)");
#SE625 Subsidies on intermediate consumption
seData$SE625=getFormulaResult(data,seData,"J59(2)+J85(2)+J89(2)");
#SE626 Subsidies on external factors ----
seData$SE626=getFormulaResult(data,seData,"J59(2)+J85(2)+J89(2)");
#SE631 Single Farm Payment ----
seData$SE631=getFormulaResult(data,seData,"J670(2)");
#SE632 Single Area Payment ----
seData$SE632=getFormulaResult(data,seData,"J680(2)");
#SE640 Additional aid
seData$SE640=getFormulaResult(data,seData,"J955(2)");
#SE630 Decoupled payments
seData$SE630=getFormulaResult(data,seData,"SE631+SE632+SE640");
#SE650 Aid for article 68
seData$SE650=getFormulaResult(data,seData,"J956(2)");
#Other subsidies
seData$SE699=getFormulaResult(data,seData,"J172(2)+J177(2)+J178(2)+J180(2)+J181(2)+J182(2)+J950(2)+J998(2)+J999(2)");
#SE605 Total subsidies excluding on investments
seData$SE605=getFormulaResult(data,seData,"SE610+SE615+SE624+SE625+SE626+SE630+SE650+SE699");
#SE395 VAT balance excluding on investments
seData$SE395=getFormulaResult(data,seData,"#402+#405-#403");
#SE390 Taxes
seData$SE390=getFormulaResult(data,seData,"#283+#288-J83(2)-J88(2)");
#SE600 Balance current subsidies & taxes
seData$SE600=getFormulaResult(data,seData,"SE605+SE395-SE390");
#SE410 Gross Farm Income
seData$SE410=getFormulaResult(data,seData,"SE131-SE275+SE600");
#SE360 Depreciation
seData$SE360=getFormulaResult(data,seData,"#300+#348+#356");
#SE415 Farm Net Value Added
seData$SE415=getFormulaResult(data,seData,"SE410-SE360");
#SE370 Wages Paid
seData$SE370=getFormulaResult(data,seData,"#259");
#SE375 Rent Paid
seData$SE375=getFormulaResult(data,seData,"#285");
#SE380 Interest Paid
seData$SE380=getFormulaResult(data,seData,"#289");
#SE365 Total External factors
seData$SE365=getFormulaResult(data,seData,"SE370+SE375+SE380");
#SE407 Payments to dairy outgoers
seData$SE407=getFormulaResult(data,seData,"J1052(2)+J2052(2)");
#SE405 Balance subsidies & taxes on investments
seData$SE405=getFormulaResult(data,seData,"#370+SE407-#404");
#SE420 Family Farm Income
seData$SE420=getFormulaResult(data,seData,"SE415-SE365+SE405");
#SE140 Cereals Output----
seData$SE140=getFormulaResult(data,seData,"K120..128(7..10)-K120..128(6)");
#SE145 TODO Protein crops Output----
#SE146 TODO energy crops Output----
#SE150 TODO Potatoes Output----
#SE155 TODO Sugar beet Output----
#SE160 TODO Oil-seed crops Output----
#SE165 TODO Industrial crops Output----
#SE170 TODO Vegetables & flowers Output----
#SE175 TODO Fruits Output----
#SE180 TODO Citrus fruit Output----
#SE185 TODO Wine and grapes Output----
#SE190 TODO Olives & olive oil Output----
#SE195 TODO Forage crops Output----
#SE200 TODO Other crop output----
return(seData)
}
File deleted
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/standardResults.R
\name{getFormulaResult}
\alias{getFormulaResult}
\title{Aggregates columns for each farms using a formula}
\usage{
getFormulaResult(data, SEdata, formulaString, aggregator = sum, onlyValue = T)
}
\arguments{
\item{data}{a fadn.container, containing all tables}
\item{SEdata}{a data.table of already calculated SE}
\item{formulaString}{The formula String to use for aggregation}
}
\value{
[FID VALUE]
}
\description{
Aggregates columns for each farms using a formula
}
\examples{
#definition of formula SE610+SE615+SE624-SE626
formula=list(add=c("SE610","J830(2)","#289","#267..270"),substract=c("SE626","M632..634(2)"))
list(add=c("#48","#49","#50"),substract=list())
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convert_data.R
\name{convert.to.fadn.raw.rds}
\alias{convert.to.fadn.raw.rds}
\title{Gets a fadn.raw.csv (csv file from DG-AGRI) and transforms it accordingly to fadn.raw.rds}
\usage{
convert.to.fadn.raw.rds(
file.path = "",
sepS = ",",
fadn.year = NA,
fadn.country = NA,
keep.csv = F,
col.id = "ID"
)
}
\arguments{
\item{file.path}{the full path of the csv file (the filename must be included)}
\item{sepS}{the separator of the csv files (by default ",")}
\item{fadn.year}{the year the csv files refers to (e.g. 2001)}
\item{fadn.country}{the three letter country code the csv files refers to (e.g. "ELL")}
\item{keep.csv}{if TRUE, copy the csv files to the CSV directory; else do not copy}
}
\value{
Saves the fadn.raw.rds file and returns TRUE if everything goes well
}
\description{
It saves two files:
- One that contain a wide format of the data, i.e. in tabular format that is identical to the csv data. This is uncompressed data.
- One that holds the same information in compressed data. It is a list that contains $data.char and $data.num data.tables in long format. 0 values are removed and only the col.id is the index on both data.tables
}
# This script provides some sample commands of fadnUtils
#
# The functions of the package are deliberately written as "fadnUtils::<FUNCTION>", in order to be
# easily identified as such. However one can write them skipping the "fadnUtils::" part
#
# For any of the functions of the package, you can see its documentation
# by either writing ?<function name> or by hovering the mouse on its name and pressing F1 (in rstudio)
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
#
# LOAD THE PACKAGE -----------------------------------------------------------------------------------------------------
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
library(data.table) #required
library(fadnUtils)
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
#
# IMPORT FADN CSV FILES INTO raw.rds -----------------------------------------------------------------------------------
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
# Step 1, Create a data.dir
fadnUtils::create.data.dir("E:/test_data_dir") #replace the path with the one you want your data.dir to reside to
# after running the command, go to this directory to see its structure
# Step 2, Set the data.dir
fadnUtils::set.data.dir("E:/test_data_dir") # The package needs to know the location of the data.dir it works with
# if you do not define, any following fadnUtil command will fail with an error
# Step 2, Import the csv files
countries = c("ELL","ITA","ESP") # select the countries to import data for
years = c(2013,2014,2015) # select the years to import data for
path.to.csv.files = "<PUT THE FOLDER WHERE CSV FILE RESIDE>" # We assume that the CSV files are are located in an external directory
path.to.csv.files ="U:/SCIENTIFIC/IFM CAP/70-Data/FADN/10_Requests/IFM-CAP/2019/Baseyear/Data/SO"
for(c in countries) { #Loop countries and years and import, one by one
for (y in years) { # after the loop, check the data.dir/rds directory to see the rds files there
file.path.cur = paste0(path.to.csv.files,'/',c,y,".csv")
if(file.exists(file.path.cur)) {
print(paste0("doing year:",y," and country:",c))
fadnUtils::convert.to.fadn.raw.rds(file.path = file.path.cur, fadn.year = y, fadn.country = c)
}
else {
print(paste0("DOES NOT EXIST for year:",y," and country:",c, "/ ", file.path.cur))
}
}
}
fadnUtils::show.data.dir.contents() #It will show the country-year pairs available
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
#
# WORK WITH raw.rds ----------------------------------------------------------------------------------------------------
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
# Step 1, set data.dir (in this case, this step is not necessary, since data.dir is already set with a previous command)
fadnUtils::set.data.dir("E:/test_data_dir")
# Step 2, load data
#To load raw r-data, only for Spain (ESP) for 2015
my.data = load.fadn.raw.rds(
countries = "ESP",
years = 2015
)
# my.data is a single large data.table, with the original csv columns and rows
nrow(my.data) #Number of rows
names(my.data) #Column names
length(names(my.data)) #Number of columns
str(my.data) #Overall structure
# You can also load for combinations of COUNTRY-YEAR
my.data = load.fadn.raw.rds(
countries = c("ELL", "ESP"),
years = c(2014,2015)
)
#If you do not define country, year, all data of the data.dir is loaded
my.data = load.fadn.raw.rds()
# You can also load a subset of the columns
my.data = load.fadn.raw.rds(
col.filter = c("ID","NUTS0","NUTS2", "SYS02","TF8","IELE_V") #Load only id, nuts2, weight, tf8 and the electricity expenses for all data.dir
)
# You can also load a subset of the rows
# Here, load only farms that belong to TF8-16, for
# all years and countries
my.data = load.fadn.raw.rds(
row.filter = "TF8==16"
)
str(my.data) #gave a look at the structure of the data. It contains only a few columns
# see that always there are two columns: load.YEAR and load.COUNTRY. They denote the loading point of the row
#Step 3, work with the data.table structure as you like.
# See example below that calculate the expenditure of electricity per country per year
my.data[,list(ELECTRICITY.th=sum(IELE_V* SYS02*0.001)),by=list(load.COUNTRY,load.YEAR)] #See comment above for load.COUNTRY,load.YEAR
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
#
# AGGREGATE raw.rds into str.rds------------------------------------------------------------------------------------------
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
# Step 1, set data.dir (in this case, this step is not necessary, since data.dir is already set with a previous command)
fadnUtils::set.data.dir("E:/test_data_dir")
# Step 2, create the raw_rds_map file
# You have to create it inside the "<data.dir>/raw_str_maps" folder. Name it as you like
# Let's suppose that there exist two such files: "2014_after.json" and "2013_before.json"
# The first (2014_after.json) is a full representation of the calculations for FADN data from 2014 and after
# The second (2013_before.json) has only the fields that must be calculated differently for the period of 2013 and before
# Step 3, aggregate the data
countries = c("ELL","ITA","ESP") # select the countries to import data for
for(c in countries) {
for (y in c(2014:2015)) {
print(paste0("doing year:",y," and country:",c))
convert.to.fadn.str.rds(fadn.year = y,
fadn.country = c,
raw.f = "2014_after.json") # we use one json for 2014 and after
}
}
# we need to merge the 2013 and 2014 and then copy it to a new json (2013 is differentnail to 2014)
merge_raw_f.path = fadnUtils::raw_str_map.merge(
source.raw_str_map.file = "2014_after.json",
new.raw_str_map.file = "2013_before.json",
return.file = T
)
file.copy(merge_raw_f.path,paste0(fadnUtils::get.data.dir(),"/raw_str_maps/merged.json"))
for(c in countries) {
for (y in c(2013)) {
print(paste0("doing year:",y," and country:",c))
convert.to.fadn.str.rds(fadn.year = y,
fadn.country = c,
raw.f = "merged.json") # we use another one json for 2013 (projection of 2013 on 2014)
}
}
fadnUtils::show.data.dir.contents() #It will show the country-year pairs available
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
#
# WORK WITH str.rds ----------------------------------------------------------------------------------------------------
#
#nununununununununununununununununununununununununununununununnunununununununununununununununununununununununununununununun#
# Step 1, set data.dir (in this case, this step is not necessary, since data.dir is already set with a previous command)
fadnUtils::set.data.dir("E:/test_data_dir")
# Step 2, Load str.rds data
my.data = fadnUtils::load.fadn.str.rds(countries = "ELL", years = c(2013:2015)) #The calculated variables are consistend for 2013 and 2014/15
str(my.data) # the loaded data is a list that contains data.tables
str(my.data$crops) #crops are in long format
# Step 3, Work with this data as regularly with r-scripting
dcast(
my.data$crops[VARIABLE=="GROF",list(GROF.mil=sum(0.001*VALUE*WEIGHT)),by=list(YEAR,COUNTRY,CROP)],
COUNTRY+CROP~YEAR, value.var="GROF.mil"
)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handle_rds_data.R
\name{grep.columns.in.raw.rds}
\alias{grep.columns.in.raw.rds}
\title{Grep a pattern into a raw.rds column names}
\usage{
grep.columns.in.raw.rds(pattern, countries = c("all"), years = c("all"))
}
\arguments{
\item{pattern}{a grep-like character pattern. This parameter is passed as is to the grep function}
\item{countries}{a character vector with all the 3-letter codes of the selected countries, e.g. c("ELL", "ESP").
If "all" is included, all available countries are loaded}
\item{years}{a numeric vector with the years selected. If "all" is included, all available years are loa}
\item{show}{if TRUE, the columsn are printed}
}
\value{
Prints the columns and returns them invisibly
}
\description{
Useful for the case where one want to look if there are certain columns present or missing
}
#+eval=FALSE
#...................................................................................#
#
# USE CASE 2 example
#
# Load r-data from a data.dir
#
#...................................................................................#
# Always load fadnUtils and data.table
library(fadnUtils)
library(data.table)
# The first step is to set the current data.dir
set.data.dir("H:/IFM-CAP/sample.fadnutils.dir")
# Let's see what countries and years have been imported in the current data.dir
show.data.dir.contents()
# .............. LOAD RAW R-DATA ..................................................#
# We can either load raw r-data files (the original FADN csv in r-friendly format),
# or structured r-data files (the original data transformed into meaningful
# information)
#To load raw r-data, only for Spain (ESP) for 2015
my.data = load.fadn.raw.rds(
countries = "ESP",
years = 2015
)
# my.data is a single large data.table, with the original csv columns and rows
nrow(my.data) #Number of rows
names(my.data) #Column names
length(names(my.data)) #Number of columns
str(my.data) #Overall structure
# .............. LOAD STRUCTURED R-DATA ...........................................#
#To load structured data, for Spain (ESP) for 2015
my.data = load.fadn.str.rds(
countries = "ESP",
years = 2015
)
# You can see that my.data is a list, with three elements: info, costs, crops
str(my.data)
# You can access each individual element like this
str(my.data$info)
str(my.data$costs)
str(my.data$crops)
# The first columns of each of the above elements (info, costs, crops)
# are created according to the ID section of the raw_str_map
names(my.data$info)
names(my.data$costs)
names(my.data$crops)
# info and costs data.tables are in wide-format (each observation in a single row,
# all attributes of a single observation in different columns).
# crops element is in long format (one observation is in many rows,
#
#
# See https://seananderson.ca/2013/10/19/reshape/ for
# discussion of the two types of data formats
head(my.data$info)
head(my.data$costs)
head(my.data$crops)
# Also on the attributes section of each of the above elements, we can access
# the column formulas and descriptions, as defined in the raw_str_map file.
View(
attr(my.data$info,"column.descriptions")
)
View(
attr(my.data$costs,"column.descriptions")
)
View(
attr(my.data$crops,"column.descriptions")
)
# Especially for the crops element, we can also see the description
# CROP column
View(
attr(my.data$crops,"crops.descriptions")
)
# .............. HOW TO LOAD COUNTRIES-YEARS COMBINATIONS .........................#
# In the previous examples, we showed how to load data for one country and one year
# In the following examples we show more combinations
#To load for Spain (ESP) and Greece (ELL) for year 2015
my.data = load.fadn.str.rds(countries = c("ESP","ELL"), years = 2015)
#To load for Spain (ESP) and Greece (ELL) for all years
my.data = load.fadn.str.rds(countries = c("ESP","ELL"))
#To load all available countries for year 2015
my.data = load.fadn.str.rds(years = 2015)
#To load all availabel data
my.data = load.fadn.str.rds()
# .............. HOW TO STORE THE LOAD .........................#
# Since loading data sometimes takes time and create big datasets
# fadUtils offers a way to save the dataset created from the load call
#The first step is to store the loaded data
# Provide the object to save, a name and a description
store.rds.data(my.data,"everything","all countries and years are here")
#You can then close R. The next time you set.data.dir, the stored objects are loaded.
#you can see them in the contents of the data.dir you can see which stored files are there
show.data.dir.contents()
# In order to load back the file
my.data.restored = restore.rds.stored.data("everything")
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handle_rds_data.R
\name{delete.fadn.str}
\alias{delete.fadn.str}
\title{Title}
\usage{
delete.fadn.str(countries = c(), years = c())
}
\arguments{
\item{years}{}
}
\value{
}
\description{
Title
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{update_elements.DT}
\alias{update_elements.DT}
\title{Updates selected elements of data stored in one DT with new one given in melted format}
\usage{
update_elements.DT(data.old, data.new)
}
\arguments{
\item{data.old}{The DT to update}
\item{data.new}{The data to insert. It must have three columns: {id,variable,new value}. E.g. data.new=data.table("id"=c(810001100105),"variable"=c("AASBIO_CV"),value=c(999999))}
}
\value{
a DT with the updated values
}
\description{
The user provides the data.new: {id,variable,new value}. The function overwrites all existing id-column with the new values
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manage_data_dir.R
\name{get.data.dir}
\alias{get.data.dir}
\title{Gets the data.dir}
\usage{
get.data.dir()
}
\value{
the value of option("fadnUtils.data.dir")
}
\description{
data.dir is the folder where data is stored
r package will create two subfolders:
csv = location to store the csv files of th DG-AGRI (fadn.raw.csv)
rds = location to store rds files (fadn.raw.rds, fadn.str.rds, etc.)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/raw_str_map.R
\name{raw_str_map.merge}
\alias{raw_str_map.merge}
\title{Merges two raw_str_map files and returns either a list or a file}
\usage{
raw_str_map.merge(
source.raw_str_map.file = NULL,
new.raw_str_map.file = NULL,
return.file = F
)
}
\arguments{
\item{source.raw_str_map.file}{the filename of the source raw_str_map. It must be relative the raw_str_maps of the current data.dir}
\item{new.raw_str_map.file}{the filename of the mask raw_str_map. It will replace any entries of the source file. It must be relative the raw_str_maps of the current data.dir}
\item{return.file}{If set to T, a temporary full file path that contains the merge is returned. Otherwise a list with the contents of the merge is returned}
}
\value{
FALSE in case of problem / if return.file=T, the temporary full path of a file that contains the merged result in json / A list with the contents of the merge if return.file=F
}
\description{
All entries in the new.raw_str_map file replace those on the source.raw_str_map file
}
\details{
Both files must be relative to the current data.dir/raw_str_maps
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manage_data_dir.R
\name{get.available.fadn.str.rds}
\alias{get.available.fadn.str.rds}
\title{Returns the available YEAR-COUNTRY fadn.str.rds, for each str.folder}
\usage{
get.available.fadn.str.rds(data.dir = NULL, extract_dir)
}
\arguments{
\item{extract_dir}{The name of the extraction dir}
}
\value{
DT of the available YEAR-COUNTRY fadn.str.rds
}
\description{
Returns the available YEAR-COUNTRY fadn.str.rds, for each str.folder
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handle_rds_data.R
\name{delete.fadn.raw}
\alias{delete.fadn.raw}
\title{Title}
\usage{
delete.fadn.raw(countries = NULL, years = NULL)
}
\arguments{
\item{years}{}
}
\value{
}
\description{
Title
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/manage_data_dir.R
\name{check.data.dir.structure}
\alias{check.data.dir.structure}
\title{Checks if the structure of the fadnUtils.data.dir is ok}
\usage{
check.data.dir.structure(data.dir = NULL, silent = T)
}
\arguments{
\item{data.dir}{a specific directory to show contents, otherwise it will read the fadnUtils.data.dir}
\item{silent}{if TRUE, do not print any message}
}
\value{
TRUe if everything is ok; FALSE otherwise
}
\description{
Checks if the structure of the fadnUtils.data.dir is ok
}
#+eval=FALSE
#...................................................................................#
#
# USE CASE 3 example
#
# Perform analysis
#
#...................................................................................#
# Always load fadnUtils and data.table
library(fadnUtils)
library(data.table)
# The fist step is to set the current data.dir
set.data.dir("H:/IFM-CAP/sample.fadnutils.dir")
# Let's see what countries and years are available for loading
show.data.dir.contents()
#We load structured data for all available countries and years
my.data = load.fadn.str.rds()
# .............. HOW MANY FARMS FOR EACH COUNTY AND EACH YEAR .....................#
# we use the info DT, and group by YEAR-COUNTRY
my.data$info[,.N,by=list(YEAR,COUNTRY)]
#We can also use dcast, to show a more tabular format
dcast(
my.data$info,
YEAR~COUNTRY,
fun.aggregate = length,
value.var =
)
# We can also export to clipboard, using the write.excel utility function
# After running the following command, open excel and paste. The result will appear.
write.excel(
dcast(
my.data$info,
YEAR~COUNTRY,
fun.aggregate = length,
value.var =
)
)
# .............. ALL CROP AREAS PER COUNTRY-YEAR ...................................#
# First, calculate the weighted area
my.data$crops[
VARIABLE=="LEVL",
VALUE.w:=WEIGHT*VALUE/1000
]
# Then dcast that variable
dcast(
my.data$crops[VARIABLE=="LEVL"],
COUNTRY+CROP~YEAR,
value.var = "VALUE.w",
fun.aggregate = sum,
na.rm = T
)
# .............. ALL CROP PRODUCTION PER COUNTRY-YEAR..............................#
dcast(
my.data$crops[VARIABLE=="GROF",VALUE.w:=WEIGHT*VALUE/1000],
COUNTRY+CROP~YEAR,
value.var = "VALUE.w",
fun.aggregate = sum,
na.rm = T
)
# .............. BARLEY PRODUCTION PER COUNTRY-YEAR................................#
dcast(
my.data$crops[
VARIABLE=="GROF" & CROP=="BARL",
VALUE.w:=WEIGHT*VALUE/1000
],
COUNTRY~YEAR,
value.var = "VALUE.w",
fun.aggregate = sum,
na.rm = T
)
# .............. DISTRIBUTION OF NUMBER OF CROPS PER COUNTRY-YEAR ..................#
crops.data = my.data$crops #catering for easier access at next steps
#this contains the number of crops for each farm-country-year/
# Be carefule, we hav to filter to count only the LEVL variable
crops.data.Ncrops = crops.data[VARIABLE=="LEVL",.N,by=list(COUNTRY,YEAR,ID)]
# This displays the quantiles of the number of crops
crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)]
# R excels on graphic representation of results
library(ggplot2)
ggplot(crops.data.Ncrops,aes(y=N,x=1)) +
geom_boxplot() +
facet_grid(YEAR~COUNTRY) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()
)+
ylab("Number of Crops")
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convert_data.R
\name{convert.to.fadn.str.rds}
\alias{convert.to.fadn.str.rds}
\title{Converts an fadn.raw.rds file to fadn.str.rds file using a raw_str_map.json file}
\usage{
convert.to.fadn.str.rds(
fadn.country = NA,
fadn.year = NA,
raw_str_map.file = NULL,
force_external_raw_str_map = FALSE,
str.name = NULL,
DEBUG = F
)
}
\arguments{
\item{fadn.country}{string with the country to extract the str data}
\item{fadn.year}{the year to extract the structured data}
\item{raw_str_map.file}{the full path to the raw_str_map file.}
\item{DEBUG}{if TRUE, prints more details on the conversion process}
\item{str.short_name}{the short name of the str data. No spaces and text up to 20 characters}
}
\value{
Saves the rds.str.fadn and returns TRUE if everything goes well
}
\description{
The raw_str_map.json specification is as follows:
}
\details{
{
"id": { "COLUMN in every list member in RDS": "COLUMN IN CSV", ....},
"info": { "COLUMN in info RDS": "COLUMN IN CSV", ....},
"livestock": {}
"crops": {
"CROP NAME 1": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
"CROP NAME 2": {"description": "description of crop name", "columns": {"VARIABLE NAME": COLUMN IN CSV", ....} },
....
}
}
The structure of the str.dir:
- A data.dir can hold more than one extractions.
- Each extraction has a short name (20 or less characters, whitespace is not allowed)
- Each extraction is stored in the data.dir/rds/<extraction_name>
- That folder contains the following files:
+ raw_str_map.json: the raw_str_map
+ fadn.str.<4-digit YEAR>.<3-letter COUNTRY>.rds: the extracted data
Notes:
1) The computed RDS file contains a list structure with the following keys: info, costs, livestock-animals and crops
All are data.tables. For all of them, the first columns are those that are contained in the "id" object
"info" and "costs" are in table format, i.e. each farm is one row and data is on columns, as defined in the
related raw_str_map.json file.
"crops" and "livestock-animals" are in wide data format (https://tidyr.tidyverse.org/), where one farm lies accross many rows, and each
row is a farm-crop-variableName-value combination
2) In $id, $info and $costs, "COLUMN IN CSV" can have two forms
i) a single column name in the fadn.raw csv file or a combination, e.g. "K120SA+K120FC+K120FU+K120CV-K120BV"
ii) the form of an object {"source": "the column in the csv", "description": "a description of what this column is about"}
3) We attach certain attributes that are useful for identifying informations:
i) In $info and $costs, the attribute "column description" provide information of the formula and the description of each column
ii) In $crops and $livestock-animals, the attribute "$crops.descriptions" and "$livestock.descriptions", provide the description of each CROP contained there
iii) In $crops and $ the attribute "$column.formulas" provide the formulas used in order to derive the VALUE
}