Difference between revisions of "OpasnetUtils/Drafts"

From Testiwiki
Jump to: navigation, search
(Answer: timing added)
(Calculations: showind works now)
 
(53 intermediate revisions by 3 users not shown)
Line 13: Line 13:
 
Call the objects stored by this code from another rode with this command:
 
Call the objects stored by this code from another rode with this command:
  
  objects.latest("Op_en6007", code_name = "answer")
+
  objects.latest("Op_en6007", code_name = "answer") # Old version that fetches all objects, depreciated and not updated.
 +
objects.latest("Op_en6007", code_name = "diagnostics") # Functions for ovariable and model diagnostics: ovashapetest, showLoctable, binoptest
 +
objects.latest("Op_en6007", code_name = "webropol") # Functions for operating with Webropol data
 +
objects.latest("Op_en6007", code_name = "miscellaneous") # Functions for various tasks
 +
objects.latest("Op_en6007", code_name = "gis") # Functions for ovariable, KML and Googl maps interactions
  
<rcode name="answer" embed=1>
+
== Rationale ==
 +
 
 +
=== Calculations ===
 +
 
 +
'''Functions for ovariable diagnostics
 +
showind has problems with get() but this version of code was acceptable [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=QcfKMkCd2ewUtZqP].
 +
 
 +
<rcode name="diagnostics" embed=0>
 +
#This is code Op_en6007/diagnostics on page [OpasnetUtils/Drafts]]
  
 
library(OpasnetUtils)
 
library(OpasnetUtils)
  
ograph <- function( # Määritellään yleisfunktio peruskuvaajan piirtämiseen.
+
# Shows a table about ovariables and their index and location changes compared with parents.
ovariable,  
+
# showind has problems with get().
x,  
+
showind <- function(name = ".GlobalEnv", sources = FALSE, prevresults = FALSE) {
y = character(),  
+
  # i ovariable
type = character(),  
+
  # k parent ovariable
other = character(),
+
  # l index in (parent) ovariable
fill = NA,  
+
  deptable <- data.frame()
...
+
  for(i in ls(name = name)) {
 +
    d = list(get(i))[[1]]
 +
    if(class(d) == "ovariable") {
 +
      depind <- list()
 +
      if(nrow(d@dependencies)>0) {
 +
        dep <- paste(d@dependencies$Name, collapse = ", ")
 +
        for(k in d@dependencies$Name){
 +
          if(!exists(k)) cat(k, "does not exist.\n") else {
 +
            if(class(get(k)) != "ovariable") cat(k, "is not an ovariable.\n") else {
 +
              ko <- list(get(k)@output)[[1]]
 +
              if("Iter" %in% colnames(ko)) ko$Iter <- as.factor(max(as.numeric(as.character(ko$Iter))))
 +
              cols <- colnames(ko)
 +
              if(!sources) cols <- cols[!grepl("Source$", cols)]
 +
              if(!prevresults) cols <- cols[!grepl("Result$", cols)]
 +
              for(l in cols) {
 +
                if(l %in% names(depind)) {
 +
                  depind[[l]] <- union(depind[[l]], unique(ko[[l]]))
 +
                } else {
 +
                  newind <- list(unique(ko[[l]]))
 +
                  names(newind) <- l
 +
                  depind <- c(depind, newind)
 +
                }
 +
              }
 +
            }
 +
          }
 +
        }
 +
      } else {
 +
        dep <- "No dependencies"
 +
      }
 +
      curcols <- colnames(d@output)
 +
      if(!sources) curcols <- curcols[!grepl("Source$", curcols)]
 +
      if(!prevresults) curcols <- curcols[!grepl("Result$", curcols)]
 +
      droploc <- character()
 +
      for(m in curcols) {
 +
        if(!is.numeric(d@output[[m]])) {
 +
          drops <- setdiff(depind[[m]], unique(d@output[[m]]))
 +
          if(length(drops>0)) {
 +
            droploc <- paste(
 +
              droploc,
 +
              paste(
 +
                m,
 +
                paste(drops, collapse = ", "),
 +
                sep = ": "
 +
              ),
 +
              sep = " | "
 +
            )
 +
          }
 +
        }
 +
      }
 +
      if(length(droploc)==0) droploc <- NA
 +
      deptable <- rbind(
 +
        deptable,
 +
        data.frame(
 +
          Ovariable = i,
 +
          Size = nrow(d@output),
 +
          Dependencies = dep,
 +
          Current = paste(curcols, collapse = ", "),
 +
          Dropped = paste(setdiff(names(depind), curcols), collapse = ", "),
 +
          New = paste(setdiff(curcols, names(depind)), collapse = ", "),
 +
          Dropped_locations = droploc
 +
        )
 +
      )
 +
    }
 +
  }
 +
  return(deptable)
 +
}
 +
 
 +
ovashapetest <- function(ova) {
 +
  allr <- rownames(ova@output)
 +
  uniqr <- rownames(unique(ova@output[ova@marginal]))
 +
  cube <- sapply(ova@output[ova@marginal], FUN = function(x) length(unique(x)))
 +
  if(length(allr) == length(uniqr)) {
 +
    cat("All rows have unique marginals.\n")
 +
  } else {
 +
    cat("Warning. All rows do not have unique marginals. Make sure that this is what you want.\n")
 +
  }
 +
  cat("Number of all rows:", length(allr), "\n")
 +
  cat("Number of all rows without Iter: Iter==1", length(ova$Iter[ova$Iter=="1"]),
 +
    "nrow/N", length(allr)/openv$N, "\n")
 +
  cat("Number of unique rows:", length(uniqr), "\n")
 +
  cat("Number of rows in a full array:", prod(cube), "\n")
 +
  oprint(cube)
 +
  nonuniqr <- setdiff(allr, uniqr)
 +
#  cat("Non-unique rows:", nonuniqr, "\n")
 +
#  oprint(head(ova@output[rownames(ova@output) %in% nonuniqr , ]))
 +
  cubesm <- cube[cube>1 & cube<50]
 +
  cubn <- names(cubesm)
 +
  for(i in 2:(length(cubn))) {
 +
    for(j in 1:(i-1)){
 +
      oprint(c(cubn[i], cubn[j]))
 +
      oprint(table(ova@output[[cubn[i]]], ova@output[[cubn[j]]], useNA="ifany"))
 +
    }
 +
  }
 +
 
 +
  for(i in colnames(ova@output)[ova@marginal]) {
 +
    locs <- ova@output[[i]]
 +
    exper <- prod(cube[names(cube) != i])
 +
    oprint(c(i, exper))
 +
    for(j in unique(ova@output[[i]])) {
 +
      cat(j, length(locs[locs == j]), ",") 
 +
    }
 +
  }
 +
}
 +
 
 +
#####################################
 +
# This function can be used to quickly locate indices that do not match between
 +
# two ovariables and thus result in an output with 0 rows.
 +
binoptest <- function(x, y) {
 +
  if(nrow(x@output) == 0) cat(paste("Ovariable", x@name,"has 0 rows in output.\n"))
 +
  if(nrow(y@output) == 0) cat(paste("Ovariable", y@name,"has 0 rows in output.\n"))
 +
  commons <- intersect(colnames(x@output), colnames(y@output))
 +
  commons <- commons[!grepl("Result$", commons)]
 +
  cat("Ovariables have these common columns:\n")
 +
  xt <- x@output
 +
  yt <- y@output
 +
  for (i in commons) {
 +
    cat(i, "with shared locations\n")
 +
    locs <- intersect(x@output[[i[1]]], y@output[[i[1]]])
 +
    if(length(locs)>50) cat(">50 of them\n") else cat(locs, "\n")
 +
    xt <- xt[xt[[i]] %in% locs , ]
 +
    yt <- yt[yt[[i]] %in% locs , ]
 +
    cat("Rows remaining", x@name, nrow(xt), y@name, nrow(yt), "\n")
 +
  }
 +
}
 +
 
 +
#### showLoctable lists locations of each index in the evaluated ovariables in the global environment.
 +
 
 +
showLoctable <- function(name = ".GlobalEnv") {
 +
  loctable <- data.frame()
 +
 
 +
  for(i in ls(name = name)) {
 +
    if(class(get(i)) == "ovariable") {
 +
      for(j in colnames(get(i)@output)) {
 +
        if(!(grepl("Source", j) | grepl("Result", j))) {
 +
          loctable <- rbind(
 +
            loctable,
 +
            data.frame(
 +
              Ovariable = i,
 +
              Index = j,
 +
              Class = paste(class(get(i)@output[[j]]), collapse=" "),
 +
              Marginal = j %in% colnames(get(i)@output)[get(i)@marginal],
 +
              NumLoc = length(unique(get(i)@output[[j]])),
 +
              Locations = paste(head(unique(get(i)@output[[j]])), collapse = " ")
 +
            )
 +
          )
 +
        }
 +
      }
 +
    }
 +
  }
 +
  return(loctable)
 +
}
 +
 
 +
objects.store(showind, binoptest, showLoctable, ovashapetest)
 +
cat("Functions showind, binoptest, showLoctable, ovashapetest stored.\n")
 +
</rcode>
 +
 
 +
'''Functions for Webropol data
 +
 
 +
<rcode name="webropol" embed=1>
 +
#This is code Op_en6007/webropol on page [OpasnetUtils/Drafts]]
 +
 
 +
library(OpasnetUtils)
 +
 
 +
### webropol.convert converts a csv file from Webropol into a useful data.frame.
 +
 
 +
webropol.convert <- function(
 +
  data, # Data.frame created from a Webropol csv file. The first row should contain headings.
 +
  rowfact, # Row number where the factor levels start (in practice, last row + 3)
 +
  textmark = "Other open" # The text that is shown in the heading if there is an open sub-question.
 +
) {
 +
  out <- dropall(data[2:(rowfact - 3) , ])
 +
  subquestion <- t(data[1 , ])
 +
  subquestion <- gsub("\xa0", " ", subquestion)
 +
  subquestion <- gsub("\xb4", " ", subquestion)
 +
  subquestion <- gsub("\n", " ", subquestion)
 +
  #  subquestion <- gsub("\\(", " ", subquestion)
 +
  #  subquestion <- gsub("\\)", " ", subquestion)
 +
  textfield <- regexpr(textmark, subquestion) != -1
 +
  subquestion <- strsplit(subquestion, ":") # Divide the heading into a main question and a subquestion.
 +
  subqtest <- 0 # The previous question name.
 +
  for(i in 1:ncol(out)) {
 +
    #print(i)
 +
    if(subquestion[[i]][1] != subqtest) { # If part of previous question, use previous fact.
 +
      fact <- as.character(data[rowfact:nrow(data) , i]) # Create factor levels from the end of Webropol file.
 +
      fact <- fact[fact != ""] # Remove empty rows
 +
      fact <- gsub("\xa0", " ", fact)
 +
      fact <- gsub("\xb4", " ", fact)
 +
      fact <- gsub("\n", " ", fact)
 +
      fact <- strsplit(fact, " = ") # Separate value (level) and interpretation (label)
 +
    }
 +
    if(length(fact) != 0 & !textfield[i]) { # Do this only if the column is not a text type column.
 +
      out[[i]] <- factor(
 +
        out[[i]],
 +
        levels = unlist(lapply(fact, function(x) x[1])),
 +
        labels = unlist(lapply(fact, function(x) x[2])),  
 +
        ordered = TRUE
 +
      )
 +
    }
 +
    subqtest <- subquestion[[i]][1]
 +
  }
 +
  return(out)
 +
}
 +
 
 +
# merge.questions takes a multiple checkbox question and merges that into a single factor.
 +
# First levels in levs have priority over others, if several levels apply to a row.
 +
 
 +
merge.questions <- function(
 +
  dat, # data.frame with questionnaire data
 +
  cols, # list of vectors of column names or numbers to be merged into one level in the factor
 +
  levs, # vector (with the same length as cols) of levels of factors into which questions are merged.
 +
  name # text string for the name of the new factor column in the data.
 +
) {
 +
  for(i in length(cols):1) {
 +
    temp <- FALSE
 +
    for(j in rev(cols[[i]])) {
 +
      temp <- temp | !is.na(dat[[j]])
 +
    }
 +
    dat[[name]][temp] <- levs[i]
 +
  }
 +
  dat[[name]] <- factor(dat[[name]], levels = levs, ordered = TRUE)
 +
  return(dat)
 +
}
 +
 
 +
 
 +
objects.store(webropol.convert, merge.questions)
 +
cat("Functions webropol.convert, merge.questions stored.\n")
 +
</rcode>
 +
 
 +
'''Miscellaneous functions
 +
 
 +
<rcode name="miscellaneous" embed=1>
 +
#This is code Op_en6007/webropol on page [OpasnetUtils/Drafts]]
 +
 
 +
library(OpasnetUtils)
 +
 
 +
############ Shuffles columns of a data.frame so that they match a pre-defined correlation matrix
 +
 
 +
#### THIS SHOULD BE UPDATED FOR OVARIABLES AS WELL: SHUFFLING ACROSS Iter WITH
 +
#### CORRELATION MATRIX ACROSS DEFINED INDICES AND THEIR LOCATIONS. OTHER INDICES ARE
 +
#### KEPT UNCHANGED, SO THE SHUFFLING HAS TO HAPPEN WITHIN EACH UNIQUE LOCATION COMBINATION.
 +
 
 +
correlvar <- function(
 +
  vars, # multivariable object to be correlated.
 +
  Sigma # covariance matrix wanted.
 
) {
 
) {
if(class(ovariable) == "ovariable") {
+
 
if(nrow(ovariable@output) == 0) ovariable <- EvalOutput(ovariable)
+
  # Method from http://www.r-bloggers.com/easily-generate-correlated-variables-from-any-distribution-without-copulas/
data <- ovariable@output
+
  require(MASS)
title <- ovariable@name
+
  mu <- rep(0,ncol(vars))
if(length(y) == 0) y <- paste(title, "Result", sep = "")
+
  rawvars <- as.data.frame(mvrnorm(n = nrow(vars), mu = mu, Sigma = Sigma))
} else {
+
  out <- as.data.frame(
data <- ovariable
+
    lapply(
title <- character()
+
      1:ncol(vars),
if(length(y) == 0) y <- "Result"
+
      FUN = function(i, vars, rawvars) {
}
+
        pvars <- rank(rawvars[[i]], ties.method = "random")
if(length(type) == 0) {
+
        tmp <- sort(vars[[i]]) # Make sure you start with ordered data.
if("Iter" %in% colnames(data)) type <- geom_boxplot() else type <- geom_bar(stat = "identity")
+
        tmp <- tmp[pvars] # Order based on correlated ranks
 +
        return(tmp)
 +
      },
 +
      vars = vars,
 +
      rawvars = rawvars
 +
    )
 +
  )
 +
  colnames(out) <- colnames(vars)
 +
  return(out)
 +
}
 +
 
 +
##################### Forgets decisions so that decision indices will be recreated.
 +
 
 +
forgetDecisions <- function() {
 +
for(i in ls(envir = openv)) {
 +
if("dec_check" %in% names(openv[[i]])) openv[[i]]$dec_check <- FALSE
 
}
 
}
out <- ggplot(data, aes_string(x = x, y = y, fill = fill)) # Määritellään kuvan sarakkeet
+
return(cat("Decisions were forgotten.\n"))
out <- out + type
 
out <- out + theme_grey(base_size=24) # Fontin kokoa suurennetaan
 
out <- out + labs(
 
title = title,
 
y = paste(unique(data[[paste(title, "Yksikkö", sep = "")]]), sep = "", collapse = ", ")
 
)
 
out <- out + theme(axis.text.x = element_text(angle = 90, hjust = 1)) # X-akselin tekstit käännetään niin että mahtuvat
 
if(length(other) != 0) out <- out + other
 
return(out)
 
 
}
 
}
 +
 +
################## Sähkön hinta tunneittain
 +
 +
#price <- opbase.data(ident="op_en7353")
 +
#temperature <- opbase.data("op_en6315.2014_5_2015")
 +
#temperature$Date <- substr(temperature$Date, 0, 11)
 +
#price$Date <- substr(price$Date, 0, 11)
 +
#mon <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
 +
#for (i in mon) {
 +
# price$Date <- gsub(i, which(mon == i), as.character(price$Date))
 +
#}
 +
#for (i in mon) {
 +
# temperature$Date <- gsub(i, which(mon == i), as.character(temperature$Date))
 +
#}
 +
#price$Hours <- substr(price$Hours, 0, 2)
 +
#price$Hours <- paste(price$Hours, ":00:00", sep="")
 +
#temperature$Time <- paste(temperature$Time, ":00", sep="")
 +
#as.character(temperature$Result)
 +
#as.numeric(temperature$Result)
 +
#cut(temperature$Result, breaks = c(-21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30), #include.lowest=TRUE)
 +
#DateTime <- as.POSIXct(paste(temperature$Date, temperature$Time), format="%Y-%m-%d %H:%M:%S")
 +
#DateHours <- as.POSIXct(paste(price$Date, price$Hours), format="%Y-%m-%d %H:%M:%S")
  
 
# fillna takes a data.frame and fills the cells with NA with each level in that column.
 
# fillna takes a data.frame and fills the cells with NA with each level in that column.
# object is the data.frame, marginals is a vector of columns (either column names or positions) that are to be filled.
+
# fillna was updated in OpasnetUtils and therefore removed from here.
# This version of fillna accepts column positions (as the previous version) and also column names in marginals.
 
  
fillna <- function (object, marginals) {
 
a <- dropall(object)
 
if(!is.numeric(marginals)) marginals <- match(marginals, colnames(object))
 
for (i in marginals) {
 
a[[i]] <- as.factor(a[[i]])
 
a1 <- a[!is.na(a[[i]]), ]
 
a2 <- a[is.na(a[[i]]), ][-i]
 
addition <- data.frame(A = levels(a[[i]]))
 
colnames(addition) <- colnames(a)[i]
 
a2 <- merge(addition, a2)
 
a <- rbind(a1, a2)
 
}
 
return(a)
 
}
 
  
 
## collapsemarg is a placeholder for a better functionality within CollapseMarginals.
 
## collapsemarg is a placeholder for a better functionality within CollapseMarginals.
Line 111: Line 379:
 
return(variable)
 
return(variable)
 
}
 
}
 +
 +
 +
# Merge all but show_bins largest bins of indices cols to 'Other'.
 +
 +
truncateIndex <- function( # Truncates indices to contain only the largest index bins.
 +
obj, # ovariable to use.
 +
cols, # names of the columns to truncate.
 +
bins = rep(10, length(cols)), # Number of bins to show, including Others. Smallest locations will be lumped to bin "Other".
 +
sum_others = TRUE # Should "Other" be summed to maintain marginal status
 +
) {
 +
if(nrow(obj@output) == 0) stop("Ovariable ", obj@name, " not evaluated.\n")
 +
test <- oapply(abs(obj), INDEX = cols, sum, na.rm = TRUE)
 +
if(length(cols) > 1 & length(bins) == 1) bins <- rep(bins, length(cols))
 +
 +
for(i in 1:length(cols))
 +
{
 +
test2 <- oapply(test, INDEX = cols[i], sum)
 +
test2@output <- test2@output[result(test2) > 0 , ]
 +
 +
temp <- as.factor(obj@output[[cols[i]]])
 +
location_weight_order <- order(result(test2), decreasing = TRUE)
 +
keeps <- test2@output[[cols[i]]][location_weight_order[0:min(bins[i] - 1, nrow(test2@output))]]
 +
levels(temp)[!levels(temp) %in% keeps] <- "Other"
 +
temp <- factor(temp, levels = c(levels(temp)[levels(temp) != "Other"], "Other"))
 +
obj@output[[cols[i]]] <- temp
 +
}
 +
# After changing some locations to "Other", sum along indices to avoid problems
 +
if(sum_others) {
 +
obj <- oapply(obj, cols = "", FUN = sum, na.rm = TRUE)
 +
}
 +
return(obj)
 +
}
 +
 +
findrest <- function (X, cols, total = 1)
 +
{
 +
# findrest input is an ovariable that can be integrated over indices cols to result in total.
 +
# Often it is used with uncertain fractions. One (often the largest) fraction is
 +
# omitted or NA, and it is replaced by whatever is missing from the total.
 +
if (nrow(X@output) == 0)
 +
X <- EvalOutput(X)
 +
rescol <- paste(X@name, "Result", sep = "")
 +
marginals <- colnames(X@output)[X@marginal]
 +
 +
# temp is the amount that is still missing from the total.
 +
temp <- total - oapply(X, cols = cols, FUN = function(x) sum(x, na.rm = TRUE))
 +
 +
# Remove old rescol because it would cause trouble later.
 +
if(rescol != paste(temp@name, "Result", sep = "")) temp <- unkeep(temp, cols = rescol)
 +
colnames(temp@output)[colnames(temp@output) == paste(temp@name, "Result", sep = "")] <- "tempResult"
 +
temp@name <- "temp" # This is to make sure that merge works.
 +
 +
out <- merge(X, temp)@output
 +
 +
# Replace missing values with values from temp.
 +
out[[rescol]] <- ifelse(is.na(out[[rescol]]), out$tempResult, out[[rescol]]) # Result comes from temp
 +
out$tempResult <- NULL
 +
X@output <- out
 +
X@marginal <- colnames(X@output) %in% marginals
 +
return(X)
 +
}
 +
 +
timing <- function(dat, timecol = NA, weeks = 6, tz = "EET")
 +
# timing converts character or numeric inputs into times using a few default formats
 +
{
 +
if(is.data.frame(dat)) # Turn dat into a data.frame in all cases.
 +
{
 +
timesall <- (dat[timecol])
 +
} else {
 +
timesall <- data.frame(Timecol = dat)
 +
timecol <- "Timecol"
 +
}
 +
 +
temprow <- character() # A whole row of timesall collapsed into a string.
 +
 +
for(i in 1:nrow(timesall))
 +
{
 +
temprow[i] <- tolower(paste(t(timesall)[ , i], collapse = ""))
 +
}
 +
 +
weekdays <- data.frame(
 +
Name = c("su", "ma", "ti", "ke", "to", "pe", "la",
 +
"sun", "mon", "tue", "wed", "thu", "fri", "sat",
 +
"sön", "mon", "tis", "ons", "tor", "fre", "lör"),
 +
Number = rep(0:6, times = 3)
 +
)
 +
 +
# day is the number of the weekday in case of repeating events.
 +
 +
day <- rep(NA, nrow(timesall))
 +
for(i in 1:nrow(weekdays))
 +
{
 +
temp1 <- grepl(weekdays$Name[i], temprow) # Is any weekday mentioned in temprow?
 +
day <- ifelse(is.na(day) & temp1, weekdays$Number[i], day) # Find the weekday number.
 +
}
 +
 +
# repall are repeating events, x are non-repeating events.
 +
 +
repall <- timesall[!is.na(day) , ]
 +
xall <- timesall[is.na(day) , ]
 +
starting <- NA
 +
 +
########### First, change non-repeating timedates.
 +
# Change timedate into POSIXct assuming formats 15.3.2013 or 2013-03-15 and 15:24 or 15.24.
 +
# Note! 13 and 2013 mean 1.1.2013 and 3.2013 means 1.3.2013
 +
 +
if(nrow(xall) > 0)
 +
{
 +
xout <- data.frame(Datrow = (1:nrow(timesall))[is.na(day)]) # Row numbers from dat.
 +
 +
for(j in colnames(xall))
 +
{
 +
x <- xall[[j]]
 +
if(is.factor(x)) x <- levels(x)[x]
 +
 +
x <- ifelse(grepl("^[0-9][0-9]$", x), paste("20", x, sep = ""), x)
 +
x <- ifelse(grepl("^[0-9][0-9][0-9][0-9]$", x), paste("1.1.", x, sep = ""), x)
 +
x <- ifelse(grepl("^[0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
 +
x <- ifelse(grepl("^[0-9][0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
 +
 +
temp <- x
 +
x <- as.POSIXct(temp, format = "%d.%m.%Y %H:%M", tz = tz)
 +
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y %H.%M", tz = tz))
 +
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H:%M", tz = tz))
 +
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H.%M", tz = tz))
 +
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y", tz = tz))
 +
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%y", tz = tz))
 +
 +
xout <- cbind(xout, X = as.POSIXct(x, origin = "1970-01-01", tz = tz))
 +
# Intermediate values turn into numeric, therefore turned back to POSIX.
 +
 +
starting <- min(c(starting, xout$X), na.rm = TRUE)
 +
 +
colnames(xout)[colnames(xout) == "X"] <- j
 +
}
 +
}
 +
 +
############## Then, change weekly repeating timedates.
 +
 +
# repall must have format ma 9:00 or TUESDAY 8.24. Weekday is case-insensitive and can be abbrevieated.
 +
# The start and end times are assumed to be on the same day. The name of day can be on either column.
 +
 +
if(nrow(repall) > 0)
 +
{
 +
 +
for(j in colnames(repall))
 +
{
 +
 +
reptime <- gsub("[[:alpha:] ]", "", repall[[j]]) # Remove alphabets and spaces.
 +
reptime <- gsub("\\.", ":", reptime)
 +
reptime <- strsplit(reptime, split = ":")
 +
 +
temp2 <- numeric()
 +
for(i in 1:length(reptime))
 +
{
 +
temp2[i] <- as.numeric(reptime[[i]][1]) * 3600 + as.numeric(reptime[[i]][2]) * 60
 +
}
 +
 +
reptime <- temp2
 +
 +
if(is.na(starting)) starting <- paste(format(Sys.Date(), format = "%Y"), "-01-01", sep = "")
 +
starting <-  as.POSIXlt(starting, origin = "1970-01-01") # First day of year.
 +
starting$mday <- starting$mday - as.numeric(format(starting, format = "%w")) + day[!is.na(day)] # Previous Sunday plus weekdaynumber.
 +
 +
reps <- data.frame()
 +
temp3 <- starting
 +
 +
for(i in 1:weeks)
 +
{
 +
reps <- rbind(reps, data.frame(
 +
Datrow = (1:nrow(timesall))[!is.na(day)],
 +
X = as.POSIXct(temp3, origin = "1970-01-01", tz = tz) + reptime
 +
))
 +
temp3$mday <- temp3$mday + 7 # Make a weekly event.
 +
}
 +
colnames(reps)[colnames(reps) == "X"] <- j
 +
 +
if(j == colnames(repall)[1]) repout <- reps else repout <- cbind(repout, reps[j])
 +
}
 +
}
 +
out <- rbind(xout, repout)
 +
 +
if(is.data.frame(dat))
 +
{
 +
dat$Datrow <- 1:nrow(dat)
 +
dat <- dat[!colnames(dat) %in% timecol]
 +
out <- merge(dat, out)
 +
out <- out[colnames(out) != "Datrow"]
 +
} else {
 +
out <- out[[timecol]]
 +
}
 +
 +
return(out)
 +
}
 +
 +
 +
# Funktio makeTimeline ottaa tapahtumalistauksen ja rakentaa siitä aikajanan. Parametrit:
 +
# event  =  data.framena tapahtumalistaus, joka sisältää ainakin alku- ja loppuajan (Alku, Loppu) ja
 +
# tapahtumatiedon sekä mahdollisesti toiston ja keston (Toisto = aikaväli päivinä,
 +
# Kesto = viimeinen tapahtuma-aika).
 +
# timeformat = jos TRUE, oletetaan POSIX-muotoiseksi ja muutetaan operointia varten sekunneiksi.
 +
# Jos FALSE, oletetaan reaaliluvuksi ja operoidaan suoraan luvuilla.
 +
 +
makeTimeline <- function(event, timeformat = TRUE) {
 +
 +
# eventiin luodaan Toisto ja Kesto, jos jompikumpi puuttuu.
 +
if(!all(c("Toisto", "Kesto") %in% colnames(event)))
 +
{
 +
event$Toisto <- NA
 +
event$Kesto <- NA
 +
}
 +
 +
if(timeformat)
 +
{
 +
for(m in c("Alku", "Loppu", "Kesto"))
 +
{
 +
event[[m]] <- as.double(as.POSIXct(event[[m]])) # Muutetaan aika sekunneiksi.
 +
}
 +
event$Toisto <- event$Toisto * 3600 * 24 # Muutetaan Toisto päivistä sekunneiksi.
 +
}
 +
 +
# Jos on puuttuvaa tietoa kestosta tai toistosta, korvataan inertillä datalla.
 +
test <- event$Toisto == 0 | event$Kesto == 0 | is.na(event$Toisto) | is.na(event$Kesto)
 +
event$Toisto <- ifelse(test, 1, event$Toisto)
 +
event$Kesto <- ifelse(test, event$Alku, event$Kesto)
 +
 +
# Luodaan aikajanan alkupiste. Eventrow otetaan aikanaan tyhjältä riviltä.
 +
 +
timeline <- data.frame(Time = min(event$Alku), Eventrow = nrow(event)+1)
 +
 +
for(i in 1:nrow(event)) { # Toista jokaiselle havaintoriville
 +
times <- 0:floor((event$Kesto[i] - event$Alku[i]) / event$Toisto[i]) # Toistojen määrä
 +
 +
# Toista tapahtumaa Kestoon asti.
 +
temp <- data.frame(
 +
Time = event$Alku[i] + times * event$Toisto[i],
 +
End = event$Loppu[i] + times * event$Toisto[i],
 +
EventrowStart = i
 +
)
 +
 +
timeline <- merge(timeline, temp[ , c("Time", "EventrowStart")], all = TRUE) # Lisätään tapahtumat aikajanaan.
 +
colnames(temp) <- c("Remove", "Time", "EventrowEnd") # Muutetaan otsikot, koska nyt halutaan mergata loppuhetket aikajanaan.
 +
timeline <- merge(timeline, temp[, c("Time", "EventrowEnd")], all = TRUE)
 +
for(j in 2:nrow(timeline)) {
 +
# Tämä luuppi käy aikajanan läpi ja täydentää tapahtumat.
 +
 +
# Ensin kaikkiin uusiin aikapisteisiin jatketaan sitä aiempaa toimintaa, joka oli menossa edellisessä pisteessä.
 +
if(is.na(timeline$Eventrow[j]))
 +
{
 +
timeline$Eventrow[j] <- timeline$Eventrow[j-1]
 +
}
 +
 +
# Sitten jatketaan uutta toimintaa niihin uusiin pisteisiin, jotka eivät ole loppupisteitä.
 +
if(is.na(timeline$EventrowStart[j]) & is.na(timeline$EventrowEnd[j]))
 +
{
 +
timeline$EventrowStart[j] <- timeline$EventrowStart[j-1]
 +
}
 +
}
 +
 +
# Jos uutta toimintaa on olemassa, statuksena käytetään sitä, muutoin statusta eli aiempaa toimintaa.
 +
timeline$Eventrow <- ifelse(!is.na(timeline$EventrowStart), timeline$EventrowStart, timeline$Eventrow)
 +
 +
# Leikataan turhat sarakkeet pois ja siirrytään seuraavalle event-riville
 +
timeline <- timeline[ , c("Time", "Eventrow")]
 +
}
 +
 +
event <- rbind(event, rep(NA, ncol(event))) # Lisää rivi loppuhetkeä varten
 +
event$Alku[nrow(event)] <- max(timeline$Time)
 +
event$Eventrow <- row(event)[ , 1]
 +
 +
timeline <- merge(timeline, event) # Yhdistetään Statuksen eli eventin rivinumeron avulla.
 +
timeline <- timeline[!colnames(timeline) %in% c("Eventrow", "Alku", "Loppu", "Toisto", "Kesto")]
 +
timeline <- timeline[order(timeline$Time) , ]
 +
 +
if(timeformat) timeline$Time <- as.POSIXct(timeline$Time, origin = "1970-01-01")
 +
 +
return(timeline)
 +
}
 +
 +
# Calculate the cumulative impact of the events on building stock to given years
 +
 +
timepoints <- function(
 +
# Function timepoints takes an event list and turns that into existing crosscutting situations at
 +
# timepoints defined by years. The output will have index Time.
 +
# In other words, this will integrate over obstime at specified timepoints.
 +
X, # X must be an ovariable with a column of the same name as obstime.
 +
obstime, # obstime must be a single-column data.frame of observation times.
 +
sumtimecol = TRUE # Should the timecol be summed up?
 +
# obstime and timecol may be numeric (by coercion) or POSIXt.
 +
) {
 +
timecol <- colnames(obstime)
 +
marginals <- colnames(X@output)[X@marginal]
 +
 +
# tapply (and therefore possibly oapply) changes continuous indices to factors! Must change back by hand.
 +
if("factor" %in% c(class(obstime[[timecol]]), class(X@output[[timecol]]))) {
 +
X@output[[timecol]] <- as.numeric(as.character(X@output[[timecol]]))
 +
obstime[[timecol]] <- as.numeric(as.character(obstime[[timecol]]))
 +
}
 +
 +
out <- data.frame()
 +
if(sumtimecol) by <- setdiff(marginals, timecol) else by <- marginals
 +
 +
for(i in obstime[[timecol]]) {
 +
temp <- X@output[X@output[[timecol]] <= i , ]
 +
if(nrow(temp) > 0) {
 +
temp <- aggregate(
 +
temp[paste(X@name, "Result", sep = "")],
 +
by = temp[colnames(temp) %in% by],
 +
FUN = sum
 +
)
 +
}
 +
if(nrow(temp) > 0) out <- rbind(out, data.frame(Time = i, temp))
 +
}
 +
X@output <- out
 +
X@marginal <- colnames(out) %in% c("Time", marginals) # Add Time to marginal
 +
 +
return(X)
 +
}
 +
 +
# ana2ova takes in variable tables from Analytica (produced with Copy Table). The output is a data.frame in long format.
 +
ana2ova <- function(dat)
 +
{
 +
i <- 1 # Number of indices
 +
cols <- character() # Names of indices
 +
locs <- list() # Vectors of locations for each index.
 +
out <- data.frame() # output
 +
repeat{ # Find out cols and locs from dat.
 +
temp <- strsplit(dat[i + 1], split = "\t")[[1]]
 +
cols[i] <- temp[1]
 +
if(length(temp) == 1) break
 +
locs[[i]] <- temp[2:length(temp)]
 +
i <- i + 1
 +
}
 +
locs[[i]] <- NA # The innermost index should have its place in locs even if the locations are not yet known.
 +
 +
inner <- (1:length(dat))[dat %in% cols[i]] # Places where the innermost index is mentioned (the data starts from the next row).
 +
if(!all(dat[inner - 1] == dat[i]) & i > 1) stop("Structure incorrect\n")
 +
if(length(inner) == 1) # len is the length of the innermost index.
 +
{
 +
len <- length(dat) - i - 1
 +
} else {
 +
len <- inner[2] - inner[1] - i
 +
}
 +
 +
for(k in inner) # Go through each table with the inner and second most inner indices.
 +
{
 +
ins <- strsplit(dat[(k + 1):(k + len)], split = "\t") # Make a data.frame.
 +
ins <- data.frame(matrix(unlist(ins), nrow = len, byrow = TRUE))
 +
 +
if(i == 1) cn <- "value" else cn <- locs[[i - 1]]
 +
colnames(ins) <- c(cols[i], cn) # Give location names to columns
 +
if(i > 1) ins <- melt(ins, id.vars = cols[i], variable.name = cols[i - 1]) # If several columns.
 +
if(i > 2) # If there are several tables.
 +
{
 +
uplocs <- data.frame(Removethis = 0)
 +
for(l in 1:(i - 2))
 +
{
 +
uplocs[[cols[l]]] <- strsplit(dat[k - i + l], split = "\t")[[1]][2]
 +
}
 +
out <- rbind(out, cbind(uplocs, ins)) # Collects all tables into a data.frame.
 +
} else {
 +
out <- ins
 +
}
 +
}
 +
out$Removethis <- NULL
 +
colnames(out)[colnames(out) == "value"] <- "Result"
 +
return(out)
 +
}
 +
 +
rm(wiki_username)
 +
objects.store(list = ls())
 +
cat("All objects in the global namespace were stored:", ls(), "\n")
 +
 +
</rcode>
 +
 +
'''Functions for GIS data
 +
 +
<rcode name="gis" embed=1>
 +
#This is code Op_en6007/gis on page [OpasnetUtils/Drafts]]
 +
 +
library(OpasnetUtils)
  
 
MyPointKML <- function( # The function creates a KML fille from a SpatialPointsDataFrame object.
 
MyPointKML <- function( # The function creates a KML fille from a SpatialPointsDataFrame object.
Line 199: Line 847:
 
}
 
}
  
ova2spat <- function( # This function converts an ovariable into a SpatialPointsDataFrame.
+
ova2spat <- function( # This function converts an ovariable or a data.frame into a SpatialPointsDataFrame.
ovariable, # An evaluated ovariable that has coordinate indices.
+
  dat, # An evaluated ovariable or data.frame that has coordinate indices.
coords, # The names of the coordinate indices as a character vector, first x then y.
+
  coords = c("LO", "LA"), # The names of the coordinate indices as a character vector, first x then y.
proj4string # Projection identifier or specification as character string. See http://spatialreference.org/
+
  proj4string = NULL # Projection identifier or specification as character string. See http://spatialreference.org/
 +
  # If proj4string is NULL, longitude-latitude system is assumed.
 
) {
 
) {
temp <- ovariable@output
+
  if(class(dat) == "ovariable") temp <- dat@output else
+
    if(is.data.frame(dat)) temp <- dat else
# Transform coordinates into numeric format.
+
      stop("object must be either evaluated ovariable or data.frame\n")
+
 
for(i in coords) {  
+
  # Transform coordinates into numeric format.
if(is(temp[[i]], "factor"))    temp[[i]] <- levels(temp[[i]])[temp[[i]]]
+
 
if(is(temp[[i]], "character")) temp[[i]] <- as.numeric(temp[[i]])
+
  for(i in coords) {  
}
+
    temp[[i]] <- as.numeric(as.character(temp[[i]]))
+
  }
# Define the coordinate points first, then add other ovariable output to it.
+
 
+
  # Define the coordinate points first, then add other ovariable output to it.
sp <- SpatialPoints(temp[coords], CRS(proj4string))
+
 
out <- SpatialPointsDataFrame(sp, temp[!colnames(temp) %in% coords])
+
  if(is.null(proj4string)) {
 
+
    sp <- SpatialPoints(temp[coords], CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))
#Transform the projection to longitude-latitude system.
+
    } else {
+
      sp <- SpatialPoints(temp[coords], CRS(proj4string))
epsg4326String <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
+
    }
out <- spTransform(out,epsg4326String)
+
  out <- SpatialPointsDataFrame(sp, temp[!colnames(temp) %in% coords])
 +
 
 +
  #Transform the projection to longitude-latitude system.
 +
  if(!is.null(proj4string)) {
 +
    epsg4326String <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
 +
    out <- spTransform(out,epsg4326String)
 +
  }
 +
 
 +
  return(out)
 +
}
  
return(out)
 
}
 
  
 
# MyRmap is a function for creating static Google maps as png.
 
# MyRmap is a function for creating static Google maps as png.
Line 379: Line 1,035:
 
}
 
}
  
 
+
objects.store(MyPointKML, ova2spat, MyRmap, MyPlotKML)
# Merge all but show_bins largest bins of indices cols to 'Other'.
+
cat("Functions MyPointKML, ova2spat, MyRmap, MyPlotKML stored.\n")
 
 
truncateIndex <- function( # Truncates an index to contain only the largest index bins.
 
obj, # ovariable to use.
 
cols, # names of the columns to truncate.
 
bins = 10# Number of bins to show. Other bins will be merged to "Other".
 
) {
 
for(i in 1:length(cols))
 
{
 
sums <- as.data.frame(as.table(tapply(result(obj), obj@output[cols[i]], sum)))
 
bins <- min(bins, length(levels(obj@output[[cols[i]]])))
 
limit <- sort(sums$Freq, decreasing = TRUE)[bins]
 
keeps <- sums[sums$Freq > limit , cols[i]]
 
keeps <- levels(keeps)[keeps]
 
 
 
levels(obj@output[[cols[i]]]) <- ifelse(
 
levels(obj@output[[cols[i]]]) %in% keeps,
 
levels(obj@output[[cols[i]]]),
 
"Other"
 
)
 
}
 
return(obj)
 
}
 
 
 
findrest <- function (X, cols, total = 1)
 
{
 
# findrest input is an ovariable that can be integrated over indices cols to result in total.
 
# Often it is used with uncertain fractions. The largest fraction is omitted or NA, and it is replaced by whatever is missing from the total.
 
if (nrow(X@output) == 0)
 
X <- EvalOutput(X)
 
rescol <- comment(result(X))
 
 
 
# temp is the amount that is still missing from the total.
 
temp <- total - oapply(X, cols = cols, FUN = function(x) sum(x, na.rm = TRUE))
 
temp@output[[rescol]] <- NULL # Remove old rescol because it would cause trouble later.
 
out <- merge(X, temp)@output
 
 
 
# Replace missing values with values from temp.
 
out[[rescol]] <- ifelse(is.na(out[[rescol]]), out$Result, out[[rescol]]) # Result comes from temp
 
if(rescol != "Result") out$Result <- NULL
 
X@output <- out
 
return(X)
 
}
 
findrest <- function (X, cols, total = 1)
 
{
 
# findrest input is an ovariable that can be integrated over indices cols to result in total.
 
# Often it is used with uncertain fractions. The largest fraction is omitted or NA, and it is replaced by whatever is missing from the total.
 
if (nrow(X@output) == 0)
 
X <- EvalOutput(X)
 
rescol <- comment(result(X))
 
marginals <- colnames(X@output)[X@marginal]
 
 
 
# temp is the amount that is still missing from the total.
 
temp <- total - oapply(X, cols = cols, FUN = function(x) sum(x, na.rm = TRUE))
 
temp <- unkeep(temp, cols = rescol) # Remove old rescol because it would cause trouble later.
 
colnames(temp@output)[colnames(temp@output) == "Result"] <- "tempResult"
 
temp@name <- "temp" # This is to make sure that merge works.
 
 
 
out <- merge(X, temp)@output
 
 
 
# Replace missing values with values from temp.
 
out[[rescol]] <- ifelse(is.na(out[[rescol]]), out$tempResult, out[[rescol]]) # Result comes from temp
 
out$tempResult <- NULL
 
X@output <- out
 
X@marginal <- colnames(X@output) %in% marginals
 
return(X)
 
}
 
 
 
unkeep <- function (X, cols = NA, sources = FALSE, prevresults = FALSE)
 
{
 
rescol <- paste(X@name, "Result", sep = "")
 
if (sources)
 
cols <- c(cols, colnames(X@output)[grepl("Source$", colnames(X@output))])
 
 
 
for(i in cols) { # Give warning about unkept indices with >1 locations.
 
if(
 
length(unique(X@output[[i]])) > 1 &
 
X@marginal[match(i, colnames(X@output))]
 
) {
 
warning(paste(
 
"There is >1 unique locations in column",
 
i, ":",
 
paste(as.character(unique(X@output[[i]])), collapse = ", ")
 
))
 
}
 
}
 
 
 
if (prevresults)
 
cols <- c(cols, colnames(X@output)[grepl("Result$", colnames(X@output)) &
 
colnames(X@output) != rescol])
 
 
 
marginals <- colnames(X@output)[X@marginal]
 
X@output <- X@output[!colnames(X@output) %in% cols]
 
X@marginal <- colnames(X@output) %in% marginals
 
 
 
return(X)
 
}
 
 
 
timing <- function(x, tz = "EET")
 
# timing converts character or numeric inputs into times using a few default formats
 
{
 
x <- as.character(x)
 
x <- ifelse(grepl("^[0-9][0-9]$", x), paste("20", x, sep = ""), x)
 
x <- ifelse(grepl("^[0-9][0-9][0-9][0-9]$", x), paste("1.1.", x, sep = ""), x)
 
x <- ifelse(grepl("^[0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
 
x <- ifelse(grepl("^[0-9][0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
 
 
 
temp <- x
 
x <- as.POSIXct(temp, format = "%d.%m.%Y %H:%M", tz = tz)
 
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y %H.%M", tz = tz))
 
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H:%M", tz = tz))
 
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H.%M", tz = tz))
 
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y", tz = tz))
 
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%y", tz = tz))
 
 
 
return(as.POSIXct(x, origin = "1970-01-01", tz = tz))
 
}
 
 
 
 
 
objects.store(ograph, fillna, collapsemarg, MyPointKML, ova2spat, MyRmap, MyPlotKML, truncateIndex, findrest, unkeep, timing)
 
 
 
cat("The following objects are stored: ograph, fillna, collapsemarg, MyPointKML, ova2spat, MyRmap, MyPlotKML, truncateIndex, findrest, unkeep, timing.\n")
 
 
 
 
</rcode>
 
</rcode>
  

Latest revision as of 19:33, 6 June 2017



Question

Which functions are so useful that they should be taken into OpasnetUtils package? This page contains draft function which will be included when they are good enough and found important.

Answer

Call the objects stored by this code from another rode with this command:

objects.latest("Op_en6007", code_name = "answer") # Old version that fetches all objects, depreciated and not updated.
objects.latest("Op_en6007", code_name = "diagnostics") # Functions for ovariable and model diagnostics: ovashapetest, showLoctable, binoptest
objects.latest("Op_en6007", code_name = "webropol") # Functions for operating with Webropol data
objects.latest("Op_en6007", code_name = "miscellaneous") # Functions for various tasks
objects.latest("Op_en6007", code_name = "gis") # Functions for ovariable, KML and Googl maps interactions

Rationale

Calculations

Functions for ovariable diagnostics showind has problems with get() but this version of code was acceptable [1].

+ Show code

Functions for Webropol data

+ Show code

Miscellaneous functions

+ Show code

Functions for GIS data

+ Show code

See also

References


Related files

<mfanonymousfilelist></mfanonymousfilelist>