Difference between revisions of "Diversity index"

From Testiwiki
Jump to: navigation, search
(Rationale)
(Calculations)
Line 215: Line 215:
 
{{hidden|
 
{{hidden|
 
<pre>
 
<pre>
 +
library(reshape2)
 +
library(ggplot2)
 +
library(OpasnetUtils)
 +
library(vegan)
 +
 
# Sp accumulation ichno simple
 
# Sp accumulation ichno simple
 
# version 2014-11-09 by Hanna Tuomisto
 
# version 2014-11-09 by Hanna Tuomisto
Line 262: Line 267:
 
trapraw <- read.csv(path, header=TRUE, stringsAsFactors = FALSE)
 
trapraw <- read.csv(path, header=TRUE, stringsAsFactors = FALSE)
 
trapname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
 
trapname <- gsub(".csv", "", basename(path), ignore.case=TRUE)
 +
 +
###### ALLA OLEVA HOITUISI MERGELLÄ. JOS HALUTAAN, VOI LISÄTÄ SKIRPTIN JOKA KERTOO MITÄ TIPUTETTIIN.
 +
a <- merge(spraw, trapraw, by = "TrapID")
  
 
# eliminate traps that are only found in one of the data files
 
# eliminate traps that are only found in one of the data files
Line 301: Line 309:
  
 
#### Calculate catch for different sampling periods and traps
 
#### Calculate catch for different sampling periods and traps
 +
#### ALLA OLEVAN VOISI KORVATA POSIXct:llä
  
 
# convert start and end date to sequential day
 
# convert start and end date to sequential day
Line 321: Line 330:
 
data_tr <- merge(traptable[,-4], data_tr)   
 
data_tr <- merge(traptable[,-4], data_tr)   
 
# fix start and end days
 
# fix start and end days
data_tr$StartDay <- aggregate(data$StartDay, by=list(data$TrapID), FUN=min, simplify=TRUE)[,2]
+
data_tr$StartDay <- aggregate(data$StartDay, by=data["TrapID"], FUN=min, simplify=TRUE)[,2]
 
data_tr$EndDay <- aggregate(data$EndDay, by=list(data$TrapID), FUN=max, simplify=TRUE)[,2]
 
data_tr$EndDay <- aggregate(data$EndDay, by=list(data$TrapID), FUN=max, simplify=TRUE)[,2]
 
data_tr$SamplingGap <- data_tr$EndDay - data_tr$StartDay - data_tr$NumDays
 
data_tr$SamplingGap <- data_tr$EndDay - data_tr$StartDay - data_tr$NumDays
 +
 +
a <- data.frame(Trap = rep(c("a", "b"), each=10),
 +
                Time = rep(1:10, 2),
 +
                Duration = rep(2, 20),
 +
                SP1 = rep(c(0,0,0,1), 5),
 +
                SP2 = rep(c(0,0,1,0), 5),
 +
                SP3 = rep(c(0,2,0,1), 5)
 +
)
 +
 +
###### FIRST FUNCTION: TAKE DATA FILES AND MERGE THEM INTO A DATA.FRAME IN LONG FORMAT
 +
 +
longfo <- function(
 +
spraw, # Species data.frame in wide format
 +
trapraw, # Trap data.frame
 +
id.vars = character() # Column names used as identifiers (not species mearurements)
 +
) {
 +
a <- merge(spraw, trapraw) # Assumed that the only common column is the Trap identifier.
 +
 +
a$StartDay <- as.POSIXct(paste(a$StartYear, a$StartMonth, a$StartDay, sep = "-"), tz = "UTC")
 +
a$EndDay <- as.POSIXct(paste(a$EndYear, a$EndMonth, a$EndDay, sep = "-"), tz = "UTC")
 +
a$NumDays <- a$EndDay - a$StartDay
 +
a <- a[ , !colnames(a) %in% c("StartYear", "StratMonth", "StartDay", "EndYear", "EndMonth", "EndDay")]
 +
 +
id.vars <- union(id.vars, c("Trap", "StartDay", "EndDay", "NumDays", "TrapID", "SeqInTrap", "SampleCode",
 +
"SoilType", "Region", "Latitude")) # The typical id.vars used.
 +
 +
a <- melt(a,
 +
  id.vars = id.vars,
 +
  variable.name = "Species",
 +
  value.name = "Individuals"
 +
)
 +
 +
## Create new variable Spec which is the first occurrence of a species in a trap.
 +
a <- a[order(a$StartDay) , ] # Order by start day.
 +
a$Spec <- 0
 +
a$Spec[as.numeric(rownames(unique(subset(a, Individuals > 0, c("Trap", "Species")))))] <- 1
 +
 +
return(a)
 +
}
 +
 +
##### ACCUMULATION FUNCTION: ACCUMULATE A VARIABLE WITHIN GROUPS DEFINED BY OTHER VARIABLES
 +
 +
accum <- function(
 +
a, # a data.frame in long format
 +
cumvar, # The variables to be cumulated
 +
sortvar = NULL, # The variable to use in sorting the data (optional)
 +
groupvar = NULL, # The variables used in grouping the data (optional)
 +
long = TRUE # Do you want to accumulate over the whole long format data.frame? You don't if you accumulate sampling time.
 +
) {
 +
out <- a
 +
# Pick only the first species if you want to accumulate sampling time.
 +
if(!long) out <- out[out$Species == unique(out$Species)[1] , ]
 +
 +
if(!is.null(sortvar)) out <- out[do.call(order, args = out[sortvar]) , ] # Orders rows by sortvar. do.call is used because
 +
#order does not accept data.frame inputs.
 +
 +
for(i in cumvar) { # Cumulate the values of cumvar within the groups defined by groupvar.
 +
out[[paste(cumvar[i], "cum", sep = "_")]] <- unlist(tapply(out[[cumvar[i]]], INDEX = out[groupvar], FUN = cumsum))
 +
}
 +
out <- merge(a, out)
 +
 +
return(out)
 +
}
 +
 +
########## THIS FUNCTION CREATES A SAMPLE OF INDIVIDUAL SAMPLING PERIODS
 +
 +
periods <- function(
 +
a, # data.frame with the data
 +
samp = TRUE, # do we sample or take rows in order?
 +
reps = 2, # how many repetitions?
 +
cols = c("Trap", "Species") # Column names used for grouping
 +
) {
 +
groups <- aggregate(a$Individuals, by = a[cols], FUN = length)
 +
 +
if(!samp) reps <- 1
 +
 +
out <- data.frame()
 +
for(i in 1:nrow(groups)) {
 +
for(j in 1:groups$x[i]) {
 +
for(k in 1:reps) {
 +
 +
if(samp) ro <- sample(groups$x[i], j, replace = FALSE) else ro <- 1:j
 +
 +
out <- rbind(out, data.frame(
 +
Len = j,
 +
Rep = k,
 +
merge(a, groups[i , colnames(groups) != "x"])[ro , ]
 +
))
 +
}
 +
}
 +
}
 +
return(out)
 +
}
 +
 +
##### AGGREGATION FUNCTION FROM THE SAMPLED PERIODS
 +
 +
aggr <- function( # Function that takes in period-sampled data and aggregates that
 +
a, # Period-sampled data in long format
 +
groups = character() # Variable names to be used in grouping the aggregation
 +
)  {
 +
groups <- union(groups, c("Len", "Reps"))
 +
out <- aggregate(a[colnames(a) %in% c("Individuals", "Individuals_cum", "Spec", "Spec_cum", "NumDays")], by = a[groups], FUN = sum)
 +
out <- cbind(out, aggregate(a[colnames(a) %in% c("Region")], by = a[groups], FUN = function(x) x[1])
 +
out <- cbind(out, aggregate(a[colnames(a) %in% c("SoilType", "TrapID")], by = a[groups], FUN = function(x) paste(x, collapse = " "))
 +
out <- cbind(out, aggregate(a[colnames(a) %in% c("StartDay")], by = a[groups], FUN = min)
 +
out <- cbind(out, aggregate(a[colnames(a) %in% c("EndDay")], by = a[groups], FUN = max)
 +
 +
out$SamplingGap <- out$EndDay - out$StartDay - out$NumDays
 +
 +
return(out)
 +
}
 +
 +
temp <- aggregate(a$Indiv_cum, INDEX = a[c("Time_cum", "Trap")], FUN = sum)
 +
temp2 <- aggregate(a$Spec_cum, INDEX = a[c("Time_cum", "Trap")], FUN = sum)
 +
 +
ggplot(subset(temp, Trap == "a"), aes(x = Time_cum, y = Freq, colour = Trap))+geom_step()
 +
 +
ggplot(a,aes(x=Cum,color=Trap, group = Trap)) +
 +
  stat_bin(data=a, aes(y=cumsum(..count..)),geom="step")+
 +
  stat_bin(data=subset(a,Trap=="b"),aes(y=cumsum(..count..)),geom="step")
 +
 +
ggplot(a, aes(x=Cum, y = cumsum(Individuals))) + geom_line()
  
 
# run data aggregation within and across traps
 
# run data aggregation within and across traps
Line 391: Line 522:
 
renyiD <- renyi(to_est[,8:ncol(to_est)], scales=c(0,1,2), hill=TRUE)
 
renyiD <- renyi(to_est[,8:ncol(to_est)], scales=c(0,1,2), hill=TRUE)
 
colnames(renyiD) <- paste("Div.q", colnames(renyiD), sep="")
 
colnames(renyiD) <- paste("Div.q", colnames(renyiD), sep="")
estSdiv <- cbind("Region"=to_est$Region, "MTM"=to_est$NumDays/30, "StartDay"=to_est$StartDay,  
+
estSdiv <- cbind(
                "Individuals"=rowSums(to_est[,8:ncol(to_est)]), estS[,-(4:5)],  
+
Region = to_est$Region,
                estSadd, renyiD, "TrapID"=to_est$TrapID, "SoilType"=to_est$SoilType)
+
MTM = to_est$NumDays/30,
 +
StartDay = to_est$StartDay,  
 +
Individuals = rowSums(to_est[,8:ncol(to_est)]),
 +
estS[,-(4:5)],  
 +
estSadd,  
 +
renyiD,
 +
TrapID = to_est$TrapID,
 +
SoilType = to_est$SoilType
 +
)
 
colnames(estSdiv)[which(colnames(estSdiv)=="S.obs")] <- "Species"
 
colnames(estSdiv)[which(colnames(estSdiv)=="S.obs")] <- "Species"
 
write.csv(estSdiv, file=paste(to_estname, "_S_est_D.csv", sep=""))
 
write.csv(estSdiv, file=paste(to_estname, "_S_est_D.csv", sep=""))

Revision as of 08:32, 9 February 2015



Question

How to calculate diversity indices?

Answer

Use the function diversity to calculate the most common indices. These parameters are used:

  • amount: the number (or proportion) of individuals of a species in a transect.
  • species: an identifier for a species
  • transect: an identifier for a transect
  • q: exponent for the diversity calculation


Amount, species, and transect are vectors (i.e. ordered sets of values). The parameters can be given to the function in several different ways. This is hopefully practical for the user.

  • Amount, species, and transect must be of same length.
  • If parameter names are not used (e.g. diversity(param1, param2, param3)), it is assumed that they are given in this order: amount, species, transect, q.
  • If individual data is given using species identifiers, it the parameter name must be given: diversity(species = param1).
  • The following default values are used for parameters:
    • Amount: 1 for each species, i.e. each species is equally abundant.
    • Species: 1, 2, 3, ... n, where n = number of rows in data, i.e. each row is a different species.
    • Transect: 1, i.e. there is only one transect.
    • q: q.wiki. You MUST define an object q.wiki before using diversity, otherwise alpha diversities will be calculated wrong. q.wiki can be a single number or a vector of numbers.


There are several ways to upload your data so that you can use the diversity function.

  • Upload your data to Opasnet Base and call for the data using op_baseGetData function.
  • Upload a CSV file to Opasnet using Special:Upload and call for the data using opasnet.data function.
  • Use Example 1 code below to enter your data. The data must be in format c(4,6,2,5,7,4,3,9) where the values are either
    • identifiers of the species 1,2,3... in which the individuals belong (one entry per individual), or
    • abundancies of species, i.e. proportions or amounts of individuals belonging to each species among the whole population (one entry per species).

Rationale

Actual function diversity

Initiate functions

+ Show code

The function diversity produces a list where the first, second, and third element are the gamma, the alpha, and transect-specific gamma diversities, respectively.

Function diversity.table produces a data.frame of several diversity indices.

Examples

Example 1 to use function

Give your data in R format or leave empty for example data:

Is your data individual data or group abundancies?:

+ Show code

Example 2

Select your data:

Which q values you want to calculate.:
0
0.5
1
2
3
6

+ Show code

The data should be given in R format as a list of values in parenthesis, beginning with c:

c(3,5,3,5,2,1,3,3,4,2) or equivalently c(0.1,0.2,0.4,0.1,0.2)

where the values are either

  • identifiers of the species 1,2,3... in which the individuals belong (one entry per individual), or
  • abundancies of species, i.e. proportions of individuals belonging to each species among the whole population (one entry per species).

Calculations


Diversity indices are thoroughly described in Wikipedia.

See also

References


Related files

<mfanonymousfilelist></mfanonymousfilelist>