Difference between revisions of "OpasnetUtils/Interpret"

From Testiwiki
Jump to: navigation, search
(Description: table of instructions copied here)
m
 
(2 intermediate revisions by 2 users not shown)
Line 15: Line 15:
 
| -14,23 || -# || -14.23. Minus in the beginning of entry is interpreted as minus, not a sign for a range.
 
| -14,23 || -# || -14.23. Minus in the beginning of entry is interpreted as minus, not a sign for a range.
 
|----
 
|----
| 50 - 125 ||# - # ||Uniform distribution between 50 and 125
+
| 50 - 125 ||# - # ||Uniform distribution between 50 and 125.
 +
|----
 +
| < 4 || < # || Uniform distribution between 0 and 4.
 
|----
 
|----
 
| -12 345 - -23.56 || -# - -#|| Uniform distribution between -12345 and -23.56.
 
| -12 345 - -23.56 || -# - -#|| Uniform distribution between -12345 and -23.56.
 
|----
 
|----
| 1 - 50 ||# - # || Loguniform distribution between 1 and 50 (Loguniformity is assumed if the ratio of upper to lower is > 100)
+
| 1 - 150 ||# - # || Loguniform distribution between 1 and 150 (Loguniformity is assumed if the ratio of upper to lower is > 100)
 
|----
 
|----
 
| 3.1 ± 1.2 or 3.1 +- 1.2||# ± # or # +- # ||Normal distribution with mean 3.1 and SD 1.2
 
| 3.1 ± 1.2 or 3.1 +- 1.2||# ± # or # +- # ||Normal distribution with mean 3.1 and SD 1.2
Line 29: Line 31:
 
| 0:0.5:1 || #:#:# ||Triangular distribution. Inputs are always sorted so order of arguments doesn't matter.
 
| 0:0.5:1 || #:#:# ||Triangular distribution. Inputs are always sorted so order of arguments doesn't matter.
 
|}
 
|}
 +
 +
{{comment|# |This would be nice as well: 2;4;7: Each entry (2, 4, and 7 in this case) are equally likely to occur. Entries can also be text.|--[[User:Jouni|Jouni]] 19:45, 24 January 2013 (EET)}}
  
 
==Code==
 
==Code==
<rcode
 
name="answer"
 
label="Initiate functions"
 
graphics="1"
 
showcode="1"
 
>
 
# Lognormal distribution parametrization functions
 
lmean <- function(parmean, parsd) {return(log(parmean)-log(1+(parsd^2)/(parmean^2))/2)}
 
lsd <- function(parmean, parsd) {return(log(1+(parsd^2)/(parmean^2)))}
 
 
# Actual interpretation function. Takes already pre-processed information and returns a distribution.
 
interpf <- function(
 
n,
 
res.char,
 
brackets.pos,
 
brackets.length,
 
minus,
 
minus.length,
 
minus.exists,
 
plusminus,
 
plusminus.length,
 
plusminus.exists,
 
doublePoint
 
) {
 
 
if(doublePoint[1] > 0) {
 
tempArgs <- sort(as.numeric(unlist(strsplit(res.char, "\\:"))))
 
return(rtriangle(n,tempArgs[1],tempArgs[3],tempArgs[2]))
 
}
 
if(brackets.pos >= 0) {
 
minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
 
n.minus.inside.brackets <- sum(minus.relevant > brackets.pos & minus.relevant < brackets.pos + brackets.length)
 
imean <- as.numeric(substr(res.char, 1, brackets.pos - 1))
 
if(n.minus.inside.brackets == 1) {
 
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos] - 1)), as.numeric(substr(res.char,
 
minus.relevant[minus.relevant > brackets.pos] + 1, brackets.pos + brackets.length - 2)))
 
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
 
if((ici[2] - imean) / (ici[1] - imean) < 1.5) {
 
return(rnorm(n, imean, isd))
 
} else {
 
return(out[[i]] <- rlnorm(n, lmean(imean, isd), lsd(imean, isd))) # menee vaarin koska isd on laskettu normaalijakaumalle
 
}
 
} else
 
if(n.minus.inside.brackets %in% c(2,3)) {
 
ici <- c(as.numeric(substr(res.char, brackets.pos + 1, minus.relevant[minus.relevant > brackets.pos][2] - 1)), as.numeric(substr(res.char,
 
minus.relevant[minus.relevant > brackets.pos][2] + 1, brackets.pos + brackets.length - 2)))
 
isd <- sum(abs(ici - imean) / 2) / qnorm(0.975)
 
return(rnorm(n, imean, isd))
 
}
 
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
 
return(NA)
 
}
 
if(minus.exists) {
 
minus.relevant <- unlist(minus)[(cumsum(c(0, minus.length)) + 1):cumsum(minus.length)]
 
if(length(minus.relevant) == 1) {
 
if(as.numeric(substr(res.char, 1, minus.relevant - 1)) / as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char))) >= 1/100) {
 
return(runif(n, as.numeric(substr(res.char, 1, minus.relevant - 1)), as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char[i])))))
 
} else {
 
return(exp(runif(n, log(as.numeric(substr(res.char, 1, minus.relevant - 1))), log(as.numeric(substr(res.char, minus.relevant + 1, nchar(res.char)))))))
 
}
 
}
 
if(length(minus.relevant) %in% c(2,3)) {
 
return(runif(n, as.numeric(substr(res.char, 1, minus.relevant[2] - 1)), as.numeric(substr(res.char, minus.relevant[2] + 1, nchar(res.char)))))
 
}
 
}
 
if(plusminus.exists) {
 
return(rnorm(n, as.numeric(substr(res.char, 1, plusminus[1] - 1)), as.numeric(substr(res.char, plusminus[1] + 1, nchar(res.char)))))
 
}
 
if(sum(unlist(strsplit(res.char, ""))==";") > 0) {
 
return(sample(sapply(strsplit(res.char, ";"), as.numeric), N, replace = TRUE))
 
}
 
warning(paste("Unable to interpret \"", res.char, "\"", sep = ""))
 
return(NA)
 
}
 
 
# The next function processes character strings and loops the interpretation function.
 
input.interp <- function(res.char, n = 1000) {
 
res.char <- gsub(" ", "", res.char)
 
res.char <- gsub(",", ".", res.char)
 
plusminus <- gregexpr(paste("\\+-|", rawToChar(as.raw(177)), sep = ""), res.char) # saattaa osoittautua ongelmaksi enkoodauksen vuoksi
 
plusminus.length <- sapply(plusminus, length)
 
plusminus.exists <- unlist(plusminus)[cumsum(c(0, plusminus.length[-length(plusminus.length)])) + 1] > 0
 
minus <- gregexpr("-", res.char)
 
minus.length <- sapply(minus, length)
 
minus.exists <- unlist(minus)[cumsum(c(0, minus.length[-length(minus.length)])) + 1] > 0
 
brackets <- gregexpr("\\(.*\\)", res.char) # matches for brackets "(...)"
 
brackets.length <- as.numeric(unlist(sapply(brackets, attributes)[1,]))
 
brackets.pos <- unlist(brackets)
 
doublePoint <- gregexpr(":", res.char)
 
out <- list()
 
for(i in 1:length(res.char)) {
 
out[[i]] <- interpf(n, res.char[i], brackets.pos[i], brackets.length[i], minus[i], minus.length[i], minus.exists[i], plusminus[[i]],
 
plusminus.length[i], plusminus.exists[i],doublePoint[[i]])
 
}
 
out
 
}
 
 
# Assisting function for data.frame wrapper.
 
iter.f <- function(x) {
 
1:x
 
}
 
 
# Data.frame wrapper for the functions.
 
interpret <- function(idata, rescol = "Result", N = 1000) {
 
 
temp <- input.interp(idata[, rescol], N)
 
temp.lengths <- sapply(temp, length)
 
out <- idata[rep(1:nrow(idata), times = temp.lengths),]
 
out$Interp.Result <- unlist(temp)
 
dim(temp.lengths) <- length(temp.lengths)
 
out$Iter<- c(apply(temp.lengths, 1, iter.f))
 
out
 
}
 
 
setGeneric("interpret")
 
  
setMethod(
+
https://www.opasnet.org/svn/opasnet_utils/trunk/R/Interpret.r
f = "interpret",
 
signature = signature(idata = "character"),
 
definition = function(idata) {
 
if(!is.data.frame){
 
callGeneric(data.frame(Result = idata))
 
}
 
callGeneric(idata)
 
}
 
)
 
</rcode>
 
  
 
==See also==
 
==See also==
  
 +
* A previous version of this code was called [http://en.opasnet.org/en-opwiki/index.php?title=Input.interp&oldid=25631 Input.interp]
 
* [[Object-oriented programming in Opasnet]]
 
* [[Object-oriented programming in Opasnet]]
 
* [[Opasnet (R library)]]
 
* [[Opasnet (R library)]]

Latest revision as of 17:45, 24 January 2013



Description

Interpret takes a vector or data.frame as argument. And returns a data.frame with certain textual inputs interpreted as probability distributions.

Example Regular expression Interpretation
12 000 # # 12000. Text is interpreted as number if space removal makes it a number.
-14,23 -# -14.23. Minus in the beginning of entry is interpreted as minus, not a sign for a range.
50 - 125 # - # Uniform distribution between 50 and 125.
< 4 < # Uniform distribution between 0 and 4.
-12 345 - -23.56 -# - -# Uniform distribution between -12345 and -23.56.
1 - 150 # - # Loguniform distribution between 1 and 150 (Loguniformity is assumed if the ratio of upper to lower is > 100)
3.1 ± 1.2 or 3.1 +- 1.2 # ± # or # +- # Normal distribution with mean 3.1 and SD 1.2
2.4 (1.8 - 3.0) # (# - #) Normal distribution with mean 2.4 and 95 % confidence interval from 1.8 to 3.0
2.4 (2.0 - 3.2) # (# - #) Lognormal distribution with mean 2.4 and 95 % confidence interval from 2.0 to 3.0. Lognormality is assumed if the difference from mean to upper limit is => 50 % greater than from mean to lower limit.
0:0.5:1 #:#:# Triangular distribution. Inputs are always sorted so order of arguments doesn't matter.

--# : This would be nice as well: 2;4;7: Each entry (2, 4, and 7 in this case) are equally likely to occur. Entries can also be text. --Jouni 19:45, 24 January 2013 (EET)

Code

https://www.opasnet.org/svn/opasnet_utils/trunk/R/Interpret.r

See also