+ Show code- Hide code
#This is code Op_en6007/webropol on page [OpasnetUtils/Drafts]]
library(OpasnetUtils)
############ Shuffles columns of a data.frame so that they match a pre-defined correlation matrix
#### THIS SHOULD BE UPDATED FOR OVARIABLES AS WELL: SHUFFLING ACROSS Iter WITH
#### CORRELATION MATRIX ACROSS DEFINED INDICES AND THEIR LOCATIONS. OTHER INDICES ARE
#### KEPT UNCHANGED, SO THE SHUFFLING HAS TO HAPPEN WITHIN EACH UNIQUE LOCATION COMBINATION.
correlvar <- function(
vars, # multivariable object to be correlated.
Sigma # covariance matrix wanted.
) {
# Method from http://www.r-bloggers.com/easily-generate-correlated-variables-from-any-distribution-without-copulas/
require(MASS)
mu <- rep(0,ncol(vars))
rawvars <- as.data.frame(mvrnorm(n = nrow(vars), mu = mu, Sigma = Sigma))
out <- as.data.frame(
lapply(
1:ncol(vars),
FUN = function(i, vars, rawvars) {
pvars <- rank(rawvars[[i]], ties.method = "random")
tmp <- sort(vars[[i]]) # Make sure you start with ordered data.
tmp <- tmp[pvars] # Order based on correlated ranks
return(tmp)
},
vars = vars,
rawvars = rawvars
)
)
colnames(out) <- colnames(vars)
return(out)
}
##################### Forgets decisions so that decision indices will be recreated.
forgetDecisions <- function() {
for(i in ls(envir = openv)) {
if("dec_check" %in% names(openv[[i]])) openv[[i]]$dec_check <- FALSE
}
return(cat("Decisions were forgotten.\n"))
}
################## Sähkön hinta tunneittain
#price <- opbase.data(ident="op_en7353")
#temperature <- opbase.data("op_en6315.2014_5_2015")
#temperature$Date <- substr(temperature$Date, 0, 11)
#price$Date <- substr(price$Date, 0, 11)
#mon <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#for (i in mon) {
# price$Date <- gsub(i, which(mon == i), as.character(price$Date))
#}
#for (i in mon) {
# temperature$Date <- gsub(i, which(mon == i), as.character(temperature$Date))
#}
#price$Hours <- substr(price$Hours, 0, 2)
#price$Hours <- paste(price$Hours, ":00:00", sep="")
#temperature$Time <- paste(temperature$Time, ":00", sep="")
#as.character(temperature$Result)
#as.numeric(temperature$Result)
#cut(temperature$Result, breaks = c(-21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30), #include.lowest=TRUE)
#DateTime <- as.POSIXct(paste(temperature$Date, temperature$Time), format="%Y-%m-%d %H:%M:%S")
#DateHours <- as.POSIXct(paste(price$Date, price$Hours), format="%Y-%m-%d %H:%M:%S")
# fillna takes a data.frame and fills the cells with NA with each level in that column.
# fillna was updated in OpasnetUtils and therefore removed from here.
## collapsemarg is a placeholder for a better functionality within CollapseMarginals.
## It takes an ovariable, and summarises all indices in cols using tapply and a user-defined function.
## However, you can also use function "pick" to select locations defined in a list picks found in indices cols.
## Function "unkeep" simply drops the unkept indices without any other operation.
## The output is an ovariable with the same name as the input.
## This was first created for [[:op_fi:Radonin terveysvaikutukset]]
collapsemarg <- function(variable, cols, fun = "sum", picks = list(), ...) { # cols is a character vector, while probs is a list
out <- dropall(variable@output)
marginals <- colnames(out)[variable@marginal]
if(tolower(fun) == "unkeep") { # The function must be a string, otherwise this row will fail.
out <- out[!colnames(out) %in% cols]
} else {
if(tolower(fun) == "pick") {
for(i in cols) {
out <- out[out[[i]] %in% picks[[match(i, cols)]] , ]
}
cols <- "" # Those locations that were picked are still marginals.
} else {
margtemp <- colnames(out)[colnames(out) %in% marginals & !colnames(out) %in% cols]
# You must leave at least one index.
out <- as.data.frame(as.table(tapply(result(variable), out[margtemp], fun)))
out <- out[!is.na(out$Freq) , ]
colnames(out)[colnames(out) == "Freq"] <- ifelse(
length(variable@name) == 0,
"Result",
paste(variable@name, "Result", sep = "")
)
}
}
variable@output <- out
variable@marginal <- colnames(out) %in% marginals & ! colnames(out) %in% cols
return(variable)
}
# Merge all but show_bins largest bins of indices cols to 'Other'.
truncateIndex <- function( # Truncates indices to contain only the largest index bins.
obj, # ovariable to use.
cols, # names of the columns to truncate.
bins = rep(10, length(cols)), # Number of bins to show, including Others. Smallest locations will be lumped to bin "Other".
sum_others = TRUE # Should "Other" be summed to maintain marginal status
) {
if(nrow(obj@output) == 0) stop("Ovariable ", obj@name, " not evaluated.\n")
test <- oapply(abs(obj), INDEX = cols, sum, na.rm = TRUE)
if(length(cols) > 1 & length(bins) == 1) bins <- rep(bins, length(cols))
for(i in 1:length(cols))
{
test2 <- oapply(test, INDEX = cols[i], sum)
test2@output <- test2@output[result(test2) > 0 , ]
temp <- as.factor(obj@output[[cols[i]]])
location_weight_order <- order(result(test2), decreasing = TRUE)
keeps <- test2@output[[cols[i]]][location_weight_order[0:min(bins[i] - 1, nrow(test2@output))]]
levels(temp)[!levels(temp) %in% keeps] <- "Other"
temp <- factor(temp, levels = c(levels(temp)[levels(temp) != "Other"], "Other"))
obj@output[[cols[i]]] <- temp
}
# After changing some locations to "Other", sum along indices to avoid problems
if(sum_others) {
obj <- oapply(obj, cols = "", FUN = sum, na.rm = TRUE)
}
return(obj)
}
findrest <- function (X, cols, total = 1)
{
# findrest input is an ovariable that can be integrated over indices cols to result in total.
# Often it is used with uncertain fractions. One (often the largest) fraction is
# omitted or NA, and it is replaced by whatever is missing from the total.
if (nrow(X@output) == 0)
X <- EvalOutput(X)
rescol <- paste(X@name, "Result", sep = "")
marginals <- colnames(X@output)[X@marginal]
# temp is the amount that is still missing from the total.
temp <- total - oapply(X, cols = cols, FUN = function(x) sum(x, na.rm = TRUE))
# Remove old rescol because it would cause trouble later.
if(rescol != paste(temp@name, "Result", sep = "")) temp <- unkeep(temp, cols = rescol)
colnames(temp@output)[colnames(temp@output) == paste(temp@name, "Result", sep = "")] <- "tempResult"
temp@name <- "temp" # This is to make sure that merge works.
out <- merge(X, temp)@output
# Replace missing values with values from temp.
out[[rescol]] <- ifelse(is.na(out[[rescol]]), out$tempResult, out[[rescol]]) # Result comes from temp
out$tempResult <- NULL
X@output <- out
X@marginal <- colnames(X@output) %in% marginals
return(X)
}
timing <- function(dat, timecol = NA, weeks = 6, tz = "EET")
# timing converts character or numeric inputs into times using a few default formats
{
if(is.data.frame(dat)) # Turn dat into a data.frame in all cases.
{
timesall <- (dat[timecol])
} else {
timesall <- data.frame(Timecol = dat)
timecol <- "Timecol"
}
temprow <- character() # A whole row of timesall collapsed into a string.
for(i in 1:nrow(timesall))
{
temprow[i] <- tolower(paste(t(timesall)[ , i], collapse = ""))
}
weekdays <- data.frame(
Name = c("su", "ma", "ti", "ke", "to", "pe", "la",
"sun", "mon", "tue", "wed", "thu", "fri", "sat",
"sön", "mon", "tis", "ons", "tor", "fre", "lör"),
Number = rep(0:6, times = 3)
)
# day is the number of the weekday in case of repeating events.
day <- rep(NA, nrow(timesall))
for(i in 1:nrow(weekdays))
{
temp1 <- grepl(weekdays$Name[i], temprow) # Is any weekday mentioned in temprow?
day <- ifelse(is.na(day) & temp1, weekdays$Number[i], day) # Find the weekday number.
}
# repall are repeating events, x are non-repeating events.
repall <- timesall[!is.na(day) , ]
xall <- timesall[is.na(day) , ]
starting <- NA
########### First, change non-repeating timedates.
# Change timedate into POSIXct assuming formats 15.3.2013 or 2013-03-15 and 15:24 or 15.24.
# Note! 13 and 2013 mean 1.1.2013 and 3.2013 means 1.3.2013
if(nrow(xall) > 0)
{
xout <- data.frame(Datrow = (1:nrow(timesall))[is.na(day)]) # Row numbers from dat.
for(j in colnames(xall))
{
x <- xall[[j]]
if(is.factor(x)) x <- levels(x)[x]
x <- ifelse(grepl("^[0-9][0-9]$", x), paste("20", x, sep = ""), x)
x <- ifelse(grepl("^[0-9][0-9][0-9][0-9]$", x), paste("1.1.", x, sep = ""), x)
x <- ifelse(grepl("^[0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
x <- ifelse(grepl("^[0-9][0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
temp <- x
x <- as.POSIXct(temp, format = "%d.%m.%Y %H:%M", tz = tz)
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y %H.%M", tz = tz))
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H:%M", tz = tz))
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H.%M", tz = tz))
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y", tz = tz))
x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%y", tz = tz))
xout <- cbind(xout, X = as.POSIXct(x, origin = "1970-01-01", tz = tz))
# Intermediate values turn into numeric, therefore turned back to POSIX.
starting <- min(c(starting, xout$X), na.rm = TRUE)
colnames(xout)[colnames(xout) == "X"] <- j
}
}
############## Then, change weekly repeating timedates.
# repall must have format ma 9:00 or TUESDAY 8.24. Weekday is case-insensitive and can be abbrevieated.
# The start and end times are assumed to be on the same day. The name of day can be on either column.
if(nrow(repall) > 0)
{
for(j in colnames(repall))
{
reptime <- gsub("[[:alpha:] ]", "", repall[[j]]) # Remove alphabets and spaces.
reptime <- gsub("\\.", ":", reptime)
reptime <- strsplit(reptime, split = ":")
temp2 <- numeric()
for(i in 1:length(reptime))
{
temp2[i] <- as.numeric(reptime[[i]][1]) * 3600 + as.numeric(reptime[[i]][2]) * 60
}
reptime <- temp2
if(is.na(starting)) starting <- paste(format(Sys.Date(), format = "%Y"), "-01-01", sep = "")
starting <- as.POSIXlt(starting, origin = "1970-01-01") # First day of year.
starting$mday <- starting$mday - as.numeric(format(starting, format = "%w")) + day[!is.na(day)] # Previous Sunday plus weekdaynumber.
reps <- data.frame()
temp3 <- starting
for(i in 1:weeks)
{
reps <- rbind(reps, data.frame(
Datrow = (1:nrow(timesall))[!is.na(day)],
X = as.POSIXct(temp3, origin = "1970-01-01", tz = tz) + reptime
))
temp3$mday <- temp3$mday + 7 # Make a weekly event.
}
colnames(reps)[colnames(reps) == "X"] <- j
if(j == colnames(repall)[1]) repout <- reps else repout <- cbind(repout, reps[j])
}
}
out <- rbind(xout, repout)
if(is.data.frame(dat))
{
dat$Datrow <- 1:nrow(dat)
dat <- dat[!colnames(dat) %in% timecol]
out <- merge(dat, out)
out <- out[colnames(out) != "Datrow"]
} else {
out <- out[[timecol]]
}
return(out)
}
# Funktio makeTimeline ottaa tapahtumalistauksen ja rakentaa siitä aikajanan. Parametrit:
# event = data.framena tapahtumalistaus, joka sisältää ainakin alku- ja loppuajan (Alku, Loppu) ja
# tapahtumatiedon sekä mahdollisesti toiston ja keston (Toisto = aikaväli päivinä,
# Kesto = viimeinen tapahtuma-aika).
# timeformat = jos TRUE, oletetaan POSIX-muotoiseksi ja muutetaan operointia varten sekunneiksi.
# Jos FALSE, oletetaan reaaliluvuksi ja operoidaan suoraan luvuilla.
makeTimeline <- function(event, timeformat = TRUE) {
# eventiin luodaan Toisto ja Kesto, jos jompikumpi puuttuu.
if(!all(c("Toisto", "Kesto") %in% colnames(event)))
{
event$Toisto <- NA
event$Kesto <- NA
}
if(timeformat)
{
for(m in c("Alku", "Loppu", "Kesto"))
{
event[[m]] <- as.double(as.POSIXct(event[[m]])) # Muutetaan aika sekunneiksi.
}
event$Toisto <- event$Toisto * 3600 * 24 # Muutetaan Toisto päivistä sekunneiksi.
}
# Jos on puuttuvaa tietoa kestosta tai toistosta, korvataan inertillä datalla.
test <- event$Toisto == 0 | event$Kesto == 0 | is.na(event$Toisto) | is.na(event$Kesto)
event$Toisto <- ifelse(test, 1, event$Toisto)
event$Kesto <- ifelse(test, event$Alku, event$Kesto)
# Luodaan aikajanan alkupiste. Eventrow otetaan aikanaan tyhjältä riviltä.
timeline <- data.frame(Time = min(event$Alku), Eventrow = nrow(event)+1)
for(i in 1:nrow(event)) { # Toista jokaiselle havaintoriville
times <- 0:floor((event$Kesto[i] - event$Alku[i]) / event$Toisto[i]) # Toistojen määrä
# Toista tapahtumaa Kestoon asti.
temp <- data.frame(
Time = event$Alku[i] + times * event$Toisto[i],
End = event$Loppu[i] + times * event$Toisto[i],
EventrowStart = i
)
timeline <- merge(timeline, temp[ , c("Time", "EventrowStart")], all = TRUE) # Lisätään tapahtumat aikajanaan.
colnames(temp) <- c("Remove", "Time", "EventrowEnd") # Muutetaan otsikot, koska nyt halutaan mergata loppuhetket aikajanaan.
timeline <- merge(timeline, temp[, c("Time", "EventrowEnd")], all = TRUE)
for(j in 2:nrow(timeline)) {
# Tämä luuppi käy aikajanan läpi ja täydentää tapahtumat.
# Ensin kaikkiin uusiin aikapisteisiin jatketaan sitä aiempaa toimintaa, joka oli menossa edellisessä pisteessä.
if(is.na(timeline$Eventrow[j]))
{
timeline$Eventrow[j] <- timeline$Eventrow[j-1]
}
# Sitten jatketaan uutta toimintaa niihin uusiin pisteisiin, jotka eivät ole loppupisteitä.
if(is.na(timeline$EventrowStart[j]) & is.na(timeline$EventrowEnd[j]))
{
timeline$EventrowStart[j] <- timeline$EventrowStart[j-1]
}
}
# Jos uutta toimintaa on olemassa, statuksena käytetään sitä, muutoin statusta eli aiempaa toimintaa.
timeline$Eventrow <- ifelse(!is.na(timeline$EventrowStart), timeline$EventrowStart, timeline$Eventrow)
# Leikataan turhat sarakkeet pois ja siirrytään seuraavalle event-riville
timeline <- timeline[ , c("Time", "Eventrow")]
}
event <- rbind(event, rep(NA, ncol(event))) # Lisää rivi loppuhetkeä varten
event$Alku[nrow(event)] <- max(timeline$Time)
event$Eventrow <- row(event)[ , 1]
timeline <- merge(timeline, event) # Yhdistetään Statuksen eli eventin rivinumeron avulla.
timeline <- timeline[!colnames(timeline) %in% c("Eventrow", "Alku", "Loppu", "Toisto", "Kesto")]
timeline <- timeline[order(timeline$Time) , ]
if(timeformat) timeline$Time <- as.POSIXct(timeline$Time, origin = "1970-01-01")
return(timeline)
}
# Calculate the cumulative impact of the events on building stock to given years
timepoints <- function(
# Function timepoints takes an event list and turns that into existing crosscutting situations at
# timepoints defined by years. The output will have index Time.
# In other words, this will integrate over obstime at specified timepoints.
X, # X must be an ovariable with a column of the same name as obstime.
obstime, # obstime must be a single-column data.frame of observation times.
sumtimecol = TRUE # Should the timecol be summed up?
# obstime and timecol may be numeric (by coercion) or POSIXt.
) {
timecol <- colnames(obstime)
marginals <- colnames(X@output)[X@marginal]
# tapply (and therefore possibly oapply) changes continuous indices to factors! Must change back by hand.
if("factor" %in% c(class(obstime[[timecol]]), class(X@output[[timecol]]))) {
X@output[[timecol]] <- as.numeric(as.character(X@output[[timecol]]))
obstime[[timecol]] <- as.numeric(as.character(obstime[[timecol]]))
}
out <- data.frame()
if(sumtimecol) by <- setdiff(marginals, timecol) else by <- marginals
for(i in obstime[[timecol]]) {
temp <- X@output[X@output[[timecol]] <= i , ]
if(nrow(temp) > 0) {
temp <- aggregate(
temp[paste(X@name, "Result", sep = "")],
by = temp[colnames(temp) %in% by],
FUN = sum
)
}
if(nrow(temp) > 0) out <- rbind(out, data.frame(Time = i, temp))
}
X@output <- out
X@marginal <- colnames(out) %in% c("Time", marginals) # Add Time to marginal
return(X)
}
# ana2ova takes in variable tables from Analytica (produced with Copy Table). The output is a data.frame in long format.
ana2ova <- function(dat)
{
i <- 1 # Number of indices
cols <- character() # Names of indices
locs <- list() # Vectors of locations for each index.
out <- data.frame() # output
repeat{ # Find out cols and locs from dat.
temp <- strsplit(dat[i + 1], split = "\t")[[1]]
cols[i] <- temp[1]
if(length(temp) == 1) break
locs[[i]] <- temp[2:length(temp)]
i <- i + 1
}
locs[[i]] <- NA # The innermost index should have its place in locs even if the locations are not yet known.
inner <- (1:length(dat))[dat %in% cols[i]] # Places where the innermost index is mentioned (the data starts from the next row).
if(!all(dat[inner - 1] == dat[i]) & i > 1) stop("Structure incorrect\n")
if(length(inner) == 1) # len is the length of the innermost index.
{
len <- length(dat) - i - 1
} else {
len <- inner[2] - inner[1] - i
}
for(k in inner) # Go through each table with the inner and second most inner indices.
{
ins <- strsplit(dat[(k + 1):(k + len)], split = "\t") # Make a data.frame.
ins <- data.frame(matrix(unlist(ins), nrow = len, byrow = TRUE))
if(i == 1) cn <- "value" else cn <- locs[[i - 1]]
colnames(ins) <- c(cols[i], cn) # Give location names to columns
if(i > 1) ins <- melt(ins, id.vars = cols[i], variable.name = cols[i - 1]) # If several columns.
if(i > 2) # If there are several tables.
{
uplocs <- data.frame(Removethis = 0)
for(l in 1:(i - 2))
{
uplocs[[cols[l]]] <- strsplit(dat[k - i + l], split = "\t")[[1]][2]
}
out <- rbind(out, cbind(uplocs, ins)) # Collects all tables into a data.frame.
} else {
out <- ins
}
}
out$Removethis <- NULL
colnames(out)[colnames(out) == "value"] <- "Result"
return(out)
}
rm(wiki_username)
objects.store(list = ls())
cat("All objects in the global namespace were stored:", ls(), "\n")
| |