Difference between revisions of "OpasnetUtils/Interpret"

From Testiwiki
Jump to: navigation, search
m
(New version moved here)
Line 5: Line 5:
  
 
==Description==
 
==Description==
interpret takes a vector and makes a data.frame out of it (to be used in e.g. make.ovariable).
 
  
It also changes abbreviations into probability samples.
+
Interpret takes a vector or data.frame as argument. And returns a data.frame with certain textual inputs interpreted as probability distributions. More info on usage [[input.interp|here]](could be moved here).  
  
 
==Code==
 
==Code==
Line 16: Line 15:
 
showcode="1"
 
showcode="1"
 
>
 
>
# INTERPRET ################### interpret takes a vector and makes a data.frame out of it (to be used in e.g. make.ovariable).
+
# Lognormal distribution parametrization functions
### It also changes abbreviations into probability samples.
+
lmean <- function(parmean, parsd) {return(log(parmean)-log(1+(parsd^2)/(parmean^2))/2)}
interpret <- function(data) {
+
lsd <- function(parmean, parsd) {return(log(1+(parsd^2)/(parmean^2)))}
sample <- NULL
+
 
if(is.vector(data)) {data <- data.frame(Result = data)}
+
# Actual interpretation function. Takes already pre-processed information and returns a distribution.
if("Iter" %in% colnames(data)) {
+
interpf <- function(
out <- data}
+
n,
else {
+
res.char,
if(!"Result" %in% colnames(data)) {cat("There MUST be an observation column named 'Result'.\n")}
+
brackets.pos,
test <- !is.na(as.numeric(as.character(data$Result)))
+
brackets.length,
+
minus,
for(i in 1:nrow(data)) {
+
minus.length,
if(test[i]) {
+
minus.exists,
sample <- c(sample, rep(as.numeric(as.character(data[i, "Result"])), n))
+
plusminus,
 +
plusminus.length,
 +
plusminus.exists,
 +
doublePoint
 +
) {
 +
 
 +
if(doublePoint[1] > 0) {
 +
tempArgs <- sort(as.numeric(unlist(strsplit(res.char, "\\:"))))
 +
return(rtriangle(n,tempArgs[1],tempArgs[3],tempArgs[2]))
 +
}
 +
if(brackets.pos >= 0) {
 +
minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
 +
n.minus.inside.brackets <- sum(minus.relevant > brackets.pos & minus.relevant < brackets.pos + brackets.length)
 +
imean <- as.numeric(substr(res.char, 1, brackets.pos - 1))
 +
if(n.minus.inside.brackets == 1) {
 +
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos] - 1)), as.numeric(substr(res.char,
 +
minus.relevant[minus.relevant > brackets.pos] + 1, brackets.pos + brackets.length - 2)))
 +
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
 +
if((ici[2] - imean) / (ici[1] - imean) < 1.5) {
 +
return(rnorm(n, imean, isd))
 +
} else {
 +
return(out[[i]] <- rlnorm(n, lmean(imean, isd), lsd(imean, isd))) # menee vaarin koska isd on laskettu normaalijakaumalle
 +
}
 +
} else  
 +
if(n.minus.inside.brackets %in% c(2,3)) {
 +
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos][2] - 1)), as.numeric(substr(res.char,
 +
minus.relevant[minus.relevant > brackets.pos][2] + 1, brackets.pos + brackets.length - 2)))
 +
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
 +
return(rnorm(n, imean, isd))
 +
}
 +
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
 +
return(NA)
 +
}
 +
if(minus.exists) {
 +
minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
 +
if(length(minus.relevant) == 1) {
 +
if(as.numeric(substr(res.char, 1, minus.relevant - 1)) / as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char))) >= 1/100) {
 +
return(runif(n, as.numeric(substr(res.char, 1, minus.relevant - 1)), as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char[i])))))
 
} else {
 
} else {
samplingguide <- as.numeric(strsplit(gsub(" ", "", data[i, "Result"]), "-")[[1]])
+
return(exp(runif(n, log(as.numeric(substr(res.char, 1, minus.relevant - 1))), log(as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char)))))))
if(is.na(samplingguide[1]) | is.na(samplingguide[2])) {
 
sample <- c(sample, rep(data[i, "Result"], n))
 
} else {
 
sample <- c(sample, runif(n, samplingguide[1], samplingguide[2]))
 
}
 
 
}
 
}
 
}
 
}
+
if(length(minus.relevant) %in% c(2,3)) {
out <- as.data.frame(array(1:(n*nrow(data)*(ncol(data)+1)), dim = c(n*nrow(data), ncol(data) + 1)))
+
return(runif(n, as.numeric(substr(res.char, 1, minus.relevant[2] - 1)), as.numeric(substr(res.char, minus.relevant[2] + 1, nchar(res.char)))))
colnames(out) <- c("Iter", colnames(data))
 
 
for(i in colnames(data)) {
 
out[i] <- rep(data[, i], each = n)
 
 
}
 
}
out$Iter <- 1:n
 
out$Result <- sample
 
 
 
}
 
}
return(out)
+
if(plusminus.exists) {
 +
return(rnorm(n, as.numeric(substr(res.char, 1, plusminus[1] - 1)), as.numeric(substr(res.char, plusminus[1] + 1, nchar(res.char)))))
 +
}
 +
if(sum(unlist(strsplit(res.char, ""))==";") > 0) {
 +
return(sample(sapply(strsplit(res.char, ";"), as.numeric), N, replace = TRUE))
 +
}
 +
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
 +
return(NA)
 +
}
 +
 
 +
# The next function processes character strings and loops the interpretation function.
 +
input.interp <- function(res.char, n = 1000) {
 +
res.char <- gsub(" ", "", res.char)
 +
res.char <- gsub(",", ".", res.char)
 +
plusminus <- gregexpr(paste("\\+-|", rawToChar(as.raw(177)), sep = ""), res.char) # saattaa osoittautua ongelmaksi enkoodauksen vuoksi
 +
plusminus.length <- sapply(plusminus, length)
 +
plusminus.exists <- unlist(plusminus)[cumsum(c(0, plusminus.length[-length(plusminus.length)])) + 1] > 0
 +
minus <- gregexpr("-", res.char)
 +
minus.length <- sapply(minus, length)
 +
minus.exists <- unlist(minus)[cumsum(c(0, minus.length[-length(minus.length)])) + 1] > 0
 +
brackets <- gregexpr("\\(.*\\)", res.char) # matches for brackets "(...)"
 +
brackets.length <- as.numeric(unlist(sapply(brackets, attributes)[1,]))
 +
brackets.pos <- unlist(brackets)
 +
doublePoint <- gregexpr(":", res.char)
 +
out <- list()
 +
for(i in 1:length(res.char)) {
 +
out[[i]] <- interpf(n, res.char[i], brackets.pos[i], brackets.length[i], minus[i], minus.length[i], minus.exists[i], plusminus[[i]],
 +
plusminus.length[i], plusminus.exists[i],doublePoint[[i]])
 +
}
 +
out
 +
}
 +
 
 +
# Assisting function for data.frame wrapper.
 +
iter.f <- function(x) {
 +
1:x
 +
}
 +
 
 +
# Data.frame wrapper for the functions.
 +
interpret <- function(idata, rescol = "Result", N = 1000) {
 +
 
 +
temp <- input.interp(idata[, rescol], N)
 +
temp.lengths <- sapply(temp, length)
 +
out <- idata[rep(1:nrow(idata), times = temp.lengths),]
 +
out$Interp.Result <- unlist(temp)
 +
dim(temp.lengths) <- length(temp.lengths)
 +
out$Iter<- c(apply(temp.lengths, 1, iter.f))
 +
out
 
}
 
}
  
 +
setGeneric("interpret")
 +
 +
setMethod(
 +
f = "interpret",
 +
signature = signature(idata = "character"),
 +
definition = function(idata) {
 +
if(!is.data.frame){
 +
callGeneric(data.frame(Result = idata))
 +
}
 +
callGeneric(idata)
 +
}
 +
)
 
</rcode>
 
</rcode>
  
 
==See also==
 
==See also==
  
* [[OpasnetBaseUtils]]
 
 
* [[Object-oriented programming in Opasnet]]
 
* [[Object-oriented programming in Opasnet]]
 
* [[Opasnet (R library)]]
 
* [[Opasnet (R library)]]

Revision as of 10:47, 20 June 2012



Description

Interpret takes a vector or data.frame as argument. And returns a data.frame with certain textual inputs interpreted as probability distributions. More info on usage here(could be moved here).

Code

- Hide code

# Lognormal distribution parametrization functions
lmean <- function(parmean, parsd) {return(log(parmean)-log(1+(parsd^2)/(parmean^2))/2)}
lsd <- function(parmean, parsd) {return(log(1+(parsd^2)/(parmean^2)))}

# Actual interpretation function. Takes already pre-processed information and returns a distribution.
interpf <- function(
	n, 
	res.char, 
	brackets.pos, 
	brackets.length, 
	minus, 
	minus.length, 
	minus.exists, 
	plusminus, 
	plusminus.length, 
	plusminus.exists,
	doublePoint
	) {

	if(doublePoint[1] > 0) {
		tempArgs <- sort(as.numeric(unlist(strsplit(res.char, "\\:"))))
		return(rtriangle(n,tempArgs[1],tempArgs[3],tempArgs[2]))
	}
	if(brackets.pos >= 0) {
		minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
		n.minus.inside.brackets <- sum(minus.relevant > brackets.pos & minus.relevant < brackets.pos + brackets.length)
		imean <- as.numeric(substr(res.char, 1, brackets.pos - 1))
		if(n.minus.inside.brackets == 1) {
			ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos] - 1)), as.numeric(substr(res.char, 
				minus.relevant[minus.relevant > brackets.pos] + 1, brackets.pos + brackets.length - 2)))
			isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
			if((ici[2] - imean) / (ici[1] - imean) < 1.5) {
				return(rnorm(n, imean, isd))
			} else {
				return(out[[i]] <- rlnorm(n, lmean(imean, isd), lsd(imean, isd))) # menee vaarin koska isd on laskettu normaalijakaumalle
			}
		} else 
		if(n.minus.inside.brackets %in% c(2,3)) {
			ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos][2] - 1)), as.numeric(substr(res.char, 
				minus.relevant[minus.relevant > brackets.pos][2] + 1, brackets.pos + brackets.length - 2)))
			isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
			return(rnorm(n, imean, isd))
		}
		warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
		return(NA)
	}
	if(minus.exists) {
		minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
		if(length(minus.relevant) == 1) {
			if(as.numeric(substr(res.char, 1, minus.relevant - 1)) / as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char))) >= 1/100) {
				return(runif(n, as.numeric(substr(res.char, 1, minus.relevant - 1)), as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char[i])))))
			} else {
				return(exp(runif(n, log(as.numeric(substr(res.char, 1, minus.relevant - 1))), log(as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char)))))))
			}
		}
		if(length(minus.relevant) %in% c(2,3)) {
			return(runif(n, as.numeric(substr(res.char, 1, minus.relevant[2] - 1)), as.numeric(substr(res.char, minus.relevant[2] + 1, nchar(res.char)))))
		}
	}
	if(plusminus.exists) {
		return(rnorm(n, as.numeric(substr(res.char, 1, plusminus[1] - 1)), as.numeric(substr(res.char, plusminus[1] + 1, nchar(res.char)))))
	}
	if(sum(unlist(strsplit(res.char, ""))==";") > 0) {
		return(sample(sapply(strsplit(res.char, ";"), as.numeric), N, replace = TRUE))
	}
	warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
	return(NA)
}

# The next function processes character strings and loops the interpretation function.
input.interp <- function(res.char, n = 1000) {
	res.char <- gsub(" ", "", res.char)
	res.char <- gsub(",", ".", res.char)
	plusminus <- gregexpr(paste("\\+-|", rawToChar(as.raw(177)), sep = ""), res.char) # saattaa osoittautua ongelmaksi enkoodauksen vuoksi
	plusminus.length <- sapply(plusminus, length)
	plusminus.exists <- unlist(plusminus)[cumsum(c(0, plusminus.length[-length(plusminus.length)])) + 1] > 0
	minus <- gregexpr("-", res.char)
	minus.length <- sapply(minus, length)
	minus.exists <- unlist(minus)[cumsum(c(0, minus.length[-length(minus.length)])) + 1] > 0
	brackets <- gregexpr("\\(.*\\)", res.char) # matches for brackets "(...)"
	brackets.length <- as.numeric(unlist(sapply(brackets, attributes)[1,]))
	brackets.pos <- unlist(brackets)
	doublePoint <- gregexpr(":", res.char)
	out <- list()
	for(i in 1:length(res.char)) {
		out[[i]] <- interpf(n, res.char[i], brackets.pos[i], brackets.length[i], minus[i], minus.length[i], minus.exists[i], plusminus[[i]], 
	plusminus.length[i], plusminus.exists[i],doublePoint[[i]])
	}
	out
}

# Assisting function for data.frame wrapper.
iter.f <- function(x) {
	1:x
}

# Data.frame wrapper for the functions.
interpret <- function(idata, rescol = "Result", N = 1000) {

	temp <- input.interp(idata[, rescol], N)
	temp.lengths <- sapply(temp, length)
	out <- idata[rep(1:nrow(idata), times = temp.lengths),]
	out$Interp.Result <- unlist(temp)
	dim(temp.lengths) <- length(temp.lengths)
	out$Iter<- c(apply(temp.lengths, 1, iter.f))
	out
}

setGeneric("interpret")

setMethod(
	f = "interpret",
	signature = signature(idata = "character"),
	definition = function(idata) {
		if(!is.data.frame){
			callGeneric(data.frame(Result = idata))
			}
			callGeneric(idata)
	}
)

See also