"0","file_names %>% filter(country==""POL"")"
{"classes":["tbl_df","tbl","data.frame"],"ncol":3,"nrow":12}
\ No newline at end of file
File deleted
"0","file_names <- file_names %>% mutate(provided=""y"")"
{"chunk_definitions":[{"chunk_id":"csetup_chunk","chunk_label":"setup","document_id":"D4546146","expansion_state":0,"options":{"engine":"r","include":false,"label":"setup"},"row":7,"row_count":1,"visible":true},{"chunk_id":"cpfnehx1vy2pk","chunk_label":"unnamed-chunk-1","document_id":"D4546146","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-1"},"row":15,"row_count":1,"visible":true},{"chunk_id":"cdy3kgtvnui1o","chunk_label":"unnamed-chunk-2","document_id":"D4546146","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-2"},"row":32,"row_count":1,"visible":true},{"chunk_id":"c53msamlrci0n","chunk_label":"unnamed-chunk-3","document_id":"D4546146","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3"},"row":45,"row_count":1,"visible":true}],"default_chunk_options":{"echo":true,"error":false},"doc_write_time":1618920319,"working_dir":null}
\ No newline at end of file
"0","library(tidyverse)"
"2","Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
"
"2","-- Attaching packages ---------------------------------------------------------------------- tidyverse 1.3.1 --
"
"2","v ggplot2 3.3.3 v purrr  0.3.4
v tibble  3.1.0 v dplyr  1.0.5
v tidyr  1.1.3 v stringr 1.4.0
v readr  1.4.0 v forcats 0.5.1
"
"2","-- Conflicts ------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::between() masks fadnUtils::between()
x dplyr::filter() masks stats::filter()
x dplyr::first() masks fadnUtils::first()
x dplyr::lag() masks stats::lag()
x dplyr::last() masks fadnUtils::last()
x purrr::transpose() masks fadnUtils::transpose()
"
"0","library(readxl)"
"0","library(data.table)"
"2","data.table 1.14.0 using 32 threads (see ?getDTthreads). Latest news: r-datatable.com
"
"2","
Attache Paket: data.table
"
"2","The following objects are masked from package:dplyr:
between, first, last
"
"2","The following object is masked from package:purrr:
transpose
"
"0","library(skimr)"
"2","Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
"
"0","knitr::opts_chunk$set(echo = TRUE)"
"0","data_requested_general <- read_xlsx(path = ""D:/public/data/fadn/lieferung_20210414/FADN data request forms_March2021_MM210326.xlsx"",sheet = 2,range = ""A365:P393"")"
"2","New names:
* `` -> ...1
"
"0","colnames(data_requested_general)[1] <- ""country"""
"0","data_requested_general <- data_requested_general %>% mutate(country=str_sub(country,2,4))"
"0","data_requested_general_long <- data_requested_general %>% pivot_longer(-country)"
"0","data_requested_general_long <- data_requested_general_long %>% filter(value==""y"") %>% rename(year=name,requested=value)"
"0","requested_and_provided <- data_requested_general_long %>% full_join(file_names %>% select(-name),by=c(""country"",""year"")) %>% filter(is.na(requested ) | is.na(provided)) "
"0","## if empty, than everything is fine"
"0","requested_and_provided"
{"classes":["tbl_df","tbl","data.frame"],"ncol":4,"nrow":0}
\ No newline at end of file
File deleted
"0","# writexl::write_xlsx(x=requested_and_provided,path=""D:/public/data/fadn/lieferung_20210414/missing_countries_years.xlsx"")"
C:/Users/yang_x/Desktop/new-Version/DESCRIPTION="F2776F07"
C:/Users/yang_x/Desktop/new-Version/NAMESPACE="934332C4"
C:/Users/yang_x/Desktop/new-Version/R/calculate_standard_results.R="6FC819DC"
C:/Users/yang_x/Desktop/new-Version/R/check_column_names.R="942047BE"
C:/Users/yang_x/Desktop/new-Version/R/common_id.R="CA494911"
C:/Users/yang_x/Desktop/new-Version/R/convert_data.R="1BEFABA1"
C:/Users/yang_x/Desktop/new-Version/R/handle_rds_data.R="35DDE8FC"
C:/Users/yang_x/Desktop/new-Version/R/manage_data_dir.R="602B4761"
C:/Users/yang_x/Desktop/new-Version/R/raw_str_map.R="62BBF8A2"
C:/Users/yang_x/Desktop/new-Version/R/utilities.R="F78E8815"
C:/Users/yang_x/Desktop/new-Version/inst/examples/use_case.3.1.R="4669FF51"
C:/Users/yang_x/Desktop/new-Version/inst/examples/use_case.3.2.R="ED58001"
C:/Users/yang_x/Desktop/new-Version/inst/examples/use_case.3.3.R="B1DC70B4"
C:/Users/yang_x/Desktop/new-Version/man/check.column.Rd="9D94E916"
C:/Users/yang_x/Desktop/new-Version/vignettes/Intro.R="EF4E9F58"
C:/Users/yang_x/Desktop/new-Version/vignettes/Intro.Rmd="9E238DFE"
C:/Users/yang_x/Desktop/test_provided_files.Rmd="FD0AC2A5"
D:/public/data/fadn/test_provided_files.R="122A95E4"
D:/public/yang/MIND_STEP/FADN_DATA_14042021.R="F74EADE7"
D:/public/yang/MIND_STEP/Fadn_data_.R="1053F691"
D:/public/yang/MIND_STEP/Rcrawler.R="56CF3422"
D:/public/yang/MIND_STEP/SearchTables.R="F76F1A31"
D:/public/yang/MIND_STEP/USE_CASE.R="94F52D50"
D:/public/yang/MIND_STEP/USE_CASE_2.R="281FB48A"
D:/public/yang/MIND_STEP/USE_CASE_3.R="739AA3A5"
D:/public/yang/MIND_STEP/USE_CASE_4.R="F68FA521"
D:/public/yang/MIND_STEP/check_method.R="B4B201CA"
D:/public/yang/MIND_STEP/fadn_setting.r="BD53633C"
D:/public/yang/MIND_STEP/ouput_common_id.R="C7B0F230"
D:/public/yang/MIND_STEP/plots.R="1F9B5F60"
D:/public/yang/MIND_STEP/raw_data_codes.inc.R="9E6A224D"
D:/public/yang/MIND_STEP/summaries.R="A5D2F2A"
D:/public/yang/MIND_STEP/test.R="99FCBEFF"
D:/public/yang/MIND_STEP/test_provided_files.R="5F352FE4"
D:/public/yang/MIND_STEP/test_provided_files.Rmd="4B232FC6"
D:/public/yang/MIND_STEP/use_case.Rmd="BB5043BA"
R/figure/unnamed-chunk-13-1.png

5.29 KiB

README.Rmd 0 → 100644
---
output: rmarkdown::github_document
---
```{r setup, include=FALSE}
knitr::opts_knit$set(root.dir = 'C:/Users/yang_x/Desktop/new-Version/')
```
<!-- README.md is generated from README.Rmd. Please edit that file -->
# fadnUtils
Develop by Dimitrios Kremmydas (JRC) and Xinxin Yang (THÜNEN)
The fadnUtils package facilitates the efficient handling of FADN data within the R language framework. Furthermore, the package is targeted for use within the JRC D.4 context. This means that there is a specific temporal pattern of how a user interacts with the package (see Figure plot).
![plot](inst/examples/pic/workflow.png)
More specifically, after a request for FADN data from DG-AGRI, this data is delivered to JRC D.4 in csv format.
# Installation
You can install the development version from Thuenen or IIASA Gitlab with:
```{r , eval =FALSE}
# Thuenen gitlab
devtools::install_git("https://git-dmz.thuenen.de/mindstep/fadnutilspackages", force = TRUE)
# IIASA gitlab
devtools::install_git("https://gitlab.iiasa.ac.at/mind-step/fadnutilspackage")
```
Then the Related R packages can be installed.
```{r, results='hide'}
requiredPackages = c('fadnUtils','data.table', 'devtools','jsonlite', 'ggplot2')
for(p in requiredPackages){
if(!require(p,character.only = TRUE)) install.packages(p)
library(p,character.only = TRUE)
}
```
# Usage in Brief
After loaded the packages, you will have a functional R package on your computer. Then, we will talk about using your package online.
1. Create a working directory
- a user-defined data directory
1. Import CSV FADN data
- convert the csv data into raw r-data
- convert raw r-data into str r-data
1. Load r-data and structured r-data
1. Perform analysis
## 1. Create a working directory
Frist, User sets a working directory. Make sure the relative path stays within `CurrentProjectDirectory`.
```{r}
# using a local directory
CurrentProjectDirectory = "D:/public/yang/MIND_STEP/New_test_fadnUtils"
create.data.dir(folder.path = CurrentProjectDirectory)
set.data.dir(CurrentProjectDirectory)
get.data.dir()
```
### Required files
We request FADN data from DG-AGRI, which is delivered to us in csv format. In order to work efficiently with R, we should convert the csv-data to an r friendly format, this step is done with help of a human-readable file, called `raw_str_map.file`. Both files are necessary. `inst/examples` is the folder for use cases that contain fadnUtils package examples and json files.
1. FADN data in csv format: the data for loading
2. A json file for extracting the variables
### Folder Structure
A working directory is specified arbitrarily by the user. This structure helps data management and maintenance. The directory looks like this:
```base
CurrentProjectDirectory/
+-- csv
+-- fadnUtils.metadata.json
+-- rds
\-- spool
\-- readme.txt
```
* csv: CSV files are stored here
* fadnUtils.metadata.json: containing the mapping from the fadn.raw.rds to the fadn.str.rds data
* rds: placing r-data in the "rds" directory
* spool: keeping related files
## 2. Import CSV FADN data
First, we will import the data into an R-friendly format using the fadnUtils package.
### Convert the csv data into raw r-data
The raw data will be added to a `rds` directory. We use a convenient function from this package to convert the csv file into raw r-data.
```{r}
fadn.data.dir <- "D:/public/data/fadn/lieferung_20210414/csv/"
# load data for country BEL and year 2009
convert.to.fadn.raw.rds(
file.path = paste0(fadn.data.dir, "BEL2009.csv"),
sepS = ",",
fadn.country = "BEL",
fadn.year = 2009
#keep.csv = T # copy csv file in csv.dir
)
```
At any time, we can check for the current data dir, what csv files (countries, year) are loaded.
```{r, eval=FALSE}
show.data.dir.contents()
```
### Convert raw r-data into structured r-data
Then, We convert raw data into structured data. Broadly, there are 3 steps to including data in an R package:
1. setting a structured data in the `structured` directory,
2. checking the `raw_str_map.file` that all variables can be converted.
3. converting the structured data successfully into `structured` directory.
#### Set a `structured` directory for saving the structured data
We set a `test` folder to placing the structured data.
```{r, warning=FALSE}
rds.dir = paste0(get.data.dir(),"/rds/")
# set a structured name for for saving the structured r-data in rds.dir
new.str.name = "test"
# set a extraction_dir
dir.create(paste0(rds.dir, new.str.name))
new.extraction.dir = paste0(rds.dir, new.str.name)
```
#### Check the variables in the `raw_str_map.file`
Before conversion it is recommended to use `check.column()` method, ensuring that all variables in the`raw_str_map.file` can be converted.
```{r results='hide', message=FALSE, warning=FALSE}
list_vars = check.column(
# a rds file or a csv file
importfilepath = paste0(rds.dir, "fadn.raw.2009.BEL.rds"),
# a json file
jsonfile = "D:/public/yang/MIND_STEP/2014_after_copy.json",
# write a new json file without unmatched variables
rewrite_json = T,
# save the new json in extraction_dir
extraction_dir = new.extraction.dir)
```
#### Convert the raw data into structured r-data using the checked json file
Finally, We can convert a raw r-data to str r-data using a external json file. For more details on converting in fadnUtils packages, `see USE_CASE.R`.
```{r, echo=FALSE}
convert.to.fadn.str.rds(fadn.country = "BEL",
fadn.year = 2009,
str.name = new.str.name # extraction_dir
)
convert.to.fadn.str.rds(fadn.country = "BEL",
fadn.year = 2009,
raw_str_map.file = "D:/public/yang/MIND_STEP/new_sample/test01/raw_str_map.json", # a external json file
str.name = new.str.name, # extraction_dir
force_external_raw_str_map = T,
DEBUG = F
)
```
#### Files Structure in `rds` folder
After conversion, we can see the `rds` folder:
* `fadn.raw.2009.BEL.rds`: raw r-data for country "BEL" and year "2009"
* `test`: extraction_dir for saving the structured r-data and extracting json file
* `fadn.str.2009.BEL.rds`: structured s-data for for country of "BEL" and year of "2009"
* `raw_str_map.json`: default json file
* `rewrite_2014_after_copy.json`: modified json file after checking the variables
```base
rds
+-- fadn.raw.2009.BEL.compressed.rds
+-- fadn.raw.2009.BEL.rds
+-- fadn.raw.2010.BEL.compressed.rds
+-- fadn.raw.2010.BEL.rds
+-- fadn.raw.2011.BEL.compressed.rds
+-- fadn.raw.2011.BEL.rds
+-- fadn.raw.2012.BEL.compressed.rds
+-- fadn.raw.2012.BEL.rds
\-- test
+-- fadn.str.2009.BEL.rds
+-- raw_str_map.json
\-- rewrite_2014_after_copy.json
```
## 3. Load raw r-data and structured r-data
In order to initiate any analysis with `fadnUtils`, we first need to load r-data. We can only load data for countries and years that that has already been imported into a data.dir folder.
### Load raw r-data for the country `BEL` and year `2009`
```{r results='hide', message=FALSE, warning=FALSE}
my.data.2009.raw = load.fadn.raw.rds(
countries = "BEL",
years = 2009
)
```
### Load structured data for the country `BEL` and year `2009`
We can load structured from country `BEL` and year `2009`.
```{r results='hide', message=FALSE, warning=FALSE}
my.data.2009.str = load.fadn.str.rds(
countries = "BEL",
years = 2009,
extraction_dir = "test" # Location of the str r-data
)
```
### Load structured data from all available countries and years.
The following is an example of loading structured data all available countries and years.
```{r results='hide', message=FALSE, warning=FALSE}
my.str.data = load.fadn.str.rds( extraction_dir = "test")
```
## 4. Perform analysis
Here are some examples to perform data.
### Collection the common id
We can collect the common id from the loaded r-data using `collect.common.id()` function on `fadnUtils`.
```{r, message=FALSE}
# Collection the common id from loaded structured r-data
collected.common.id_str = collect.common.id(my.str.data)
```
### Plotting
To build a basic plot, we will use the `ggplot` function using the plotting package
`ggplot2`.
```{r results='hide', message=FALSE, warning=FALSE}
crops.data = my.str.data$crops #catering for easier access at next steps
#this contains the number of crops for each farm-country-year/
# Be carefule, we hav to filter to count only the LEVL variable
crops.data.Ncrops = crops.data[VARIABLE=="LEVL",.N,by=list(COUNTRY,YEAR,ID)]
# This displays the quantiles of the number of crops
crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)]
ggplot(crops.data.Ncrops,aes(y=N,x=1)) +
geom_boxplot() +
facet_grid(YEAR~COUNTRY) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()
)+
ylab("Number of Crops")
```
### Some other examples
```{r}
# sample and representend number of farms
my.str.data$info[,list(Nobs_sample=.N,Nobs_represented=sum(WEIGHT)),
by=.(COUNTRY,YEAR)]
# only for full sample (common id over years in selected data)
my.str.data$info[ID %in% collected.common.id_str[[1]],
list(Nobs_sample=.N,
Nobs_represented=sum(WEIGHT)),
by=.(COUNTRY,YEAR)]
```
**Notices:** Please read `inst/examples/FADN_USE_CASE.R` and `use_case.docx` for more details on using fadnUtils.
README.docx 0 → 100644
File added
File moved
README_files/figure-markdown_github/unnamed-chunk-12-1.png

8.69 KiB

README_files/figure-markdown_github/unnamed-chunk-13-1.png

12 KiB

...@@ -489,7 +489,7 @@ my.data = load.fadn.str.rds(extraction_dir = new.str.name) ...@@ -489,7 +489,7 @@ my.data = load.fadn.str.rds(extraction_dir = new.str.name)
############################################################################ ############################################################################
#We load structured data for all available countries and years #We load structured data for all available countries and years
my.str.data = load.fadn.str.rds(extraction_dir = "a") my.str.data = load.fadn.str.rds(extraction_dir = "test")
##---------------------------------------------------------------- ##----------------------------------------------------------------
## HOW MANY FARMS FOR EACH COUNTY AND EACH YEAR -- ## HOW MANY FARMS FOR EACH COUNTY AND EACH YEAR --
...@@ -580,7 +580,7 @@ crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)] ...@@ -580,7 +580,7 @@ crops.data.Ncrops[,as.list(quantile(N)),by=list(YEAR,COUNTRY)][order(COUNTRY)]
# R excels on graphic representation of results # R excels on graphic representation of results
library(ggplot2) library(ggplot2)
ggplot(crops.data.Ncrops,aes(y=N,x=1)) + ggplot(crops.data.Ncrops,aes(y=1,x=N)) +
geom_boxplot() + geom_boxplot() +
facet_grid(YEAR~COUNTRY) + facet_grid(YEAR~COUNTRY) +
theme(axis.title.x=element_blank(), theme(axis.title.x=element_blank(),
...@@ -777,7 +777,7 @@ output_common_id <- function(countries_list, saveExcel = TRUE, excelname , saveP ...@@ -777,7 +777,7 @@ output_common_id <- function(countries_list, saveExcel = TRUE, excelname , saveP
cat(excelname," is saved in ",xlsx_file_dir, "\n")} cat(excelname," is saved in ",xlsx_file_dir, "\n")}
if (savePlots == TRUE) cat("plots are saved in", plots.dir, "\n") if (savePlots == TRUE) cat("plots are saved in", plots.dir, "\n")
return(outlist) return(list(outlist,p))
} }
# get all countires in fadn str data # get all countires in fadn str data
...@@ -787,3 +787,4 @@ ID_list <- output_common_id(countries_list = countries) ...@@ -787,3 +787,4 @@ ID_list <- output_common_id(countries_list = countries)
# get Germany: DEU and Kroatien: HRV # get Germany: DEU and Kroatien: HRV
DEU_list <- output_common_id(c("HRV", "DEU"), saveExcel = TRUE, excelname = "HRV_DEU.xlsx", savePlots = TRUE) DEU_list <- output_common_id(c("HRV", "DEU"), saveExcel = TRUE, excelname = "HRV_DEU.xlsx", savePlots = TRUE)
DEU_list[[1]]