Difference between revisions of "OpasnetUtils/Drafts"

Revision as of 08:14, 6 June 2017

This page is a method. The page identifier is Op_en6007
Moderator:Jouni (see all)
Give your opinion to the peer rating of the content of this page. {{ #opasnet_rater: }}
Upload data

Question

Which functions are so useful that they should be taken into OpasnetUtils package? This page contains draft function which will be included when they are good enough and found important.

Answer

Call the objects stored by this code from another rode with this command:

objects.latest("Op_en6007", code_name = "answer") # Old version that fetches all objects, depreciated and not updated.
objects.latest("Op_en6007", code_name = "diagnostics") # Functions for ovariable and model diagnostics: ovashapetest, showLoctable, binoptest
objects.latest("Op_en6007", code_name = "webropol") # Functions for operating with Webropol data
objects.latest("Op_en6007", code_name = "miscellaneous") # Functions for various tasks
objects.latest("Op_en6007", code_name = "gis") # Functions for ovariable, KML and Googl maps interactions

Rationale

Calculations

Functions for ovariable diagnostics showind has problems with get() but this version of code was acceptable [1].

+ Show code - Hide code

#This is code Op_en6007/diagnostics on page [OpasnetUtils/Drafts]]

library(OpasnetUtils)

# Shows a table about ovariables and their index and location changes compared with parents.
# showind has problems with get().
showind <- function(name = ".GlobalEnv", sources = FALSE, prevresults = FALSE) {
  # i ovariable
  # k parent ovariable
  # l index in (parent) ovariable
  deptable <- data.frame()
  for(i in ls(name = name)) {
    #    d <- g#et(i)
    if(class(get(i)) == "ovariable") {
      depind <- list()
      if(nrow(get(i)@dependencies)>0) {
        dep <- paste(get(i)@dependencies$Name, collapse = ", ")
        for(k in get(i)@dependencies$Name){
          if(!exists(k)) cat(k, "does not exist.\n") else {
            if(class(get(k)) != "ovariable") cat(k, "is not an ovariable.\n") else {
              #              ko <- g#et(k)
              # We don't want to look at all Iters, but forbidden <-get() makes this tricky., so we just disable
              # this functionality at the moment. koIter should be replaced by ko$Iter where ko <- g#et(k)
              if("Iter" %in% colnames(get(k)@output)) koIter <- as.factor(max(as.numeric(as.character(get(k)$Iter))))
              cols <- colnames(get(k)@output)
              if(!sources) cols <- cols[!grepl("Source$", cols)]
              if(!prevresults) cols <- cols[!grepl("Result$", cols)]
              for(l in cols) {
                if(l %in% names(depind)) {
                  depind[[l]] <- union(depind[[l]], unique(get(k)@output[[l]]))
                } else {
                  newind <- list(unique(get(k)@output[[l]]))
                  names(newind) <- l
                  depind <- c(depind, newind)
                }
              }
            }
          }
        }
      } else {
        dep <- "No dependencies"
      }
      curcols <- colnames(get(i)@output)
      if(!sources) curcols <- curcols[!grepl("Source$", curcols)]
      if(!prevresults) curcols <- curcols[!grepl("Result$", curcols)]
      droploc <- character()
      for(m in curcols) {
        if(!is.numeric(get(i)@output[[m]])) {
          drops <- setdiff(depind[[m]], unique(get(i)@output[[m]]))
          if(length(drops>0)) {
            droploc <- paste(
              droploc, 
              paste(
                m, 
                paste(drops, collapse = ", "),
                sep = ": "
              ),
              sep = " | "
            )
          }
        }
      }
      if(length(droploc)==0) droploc <- NA
      deptable <- rbind(
        deptable,
        data.frame(
          Ovariable = i,
          Size = nrow(d@output),
          Dependencies = dep,
          Current = paste(curcols, collapse = ", "),
          Dropped = paste(setdiff(names(depind), curcols), collapse = ", "),
          New = paste(setdiff(curcols, names(depind)), collapse = ", "),
          Dropped_locations = droploc
        )
      )
    }
  }
  return(deptable)
}

ovashapetest <- function(ova) {
  allr <- rownames(ova@output)
  uniqr <- rownames(unique(ova@output[ova@marginal]))
  cube <- sapply(ova@output[ova@marginal], FUN = function(x) length(unique(x)))
  if(length(allr) == length(uniqr)) {
    cat("All rows have unique marginals.\n")
  } else {
    cat("Warning. All rows do not have unique marginals. Make sure that this is what you want.\n")
  }
  cat("Number of all rows:", length(allr), "\n")
  cat("Number of all rows without Iter: Iter==1", length(ova$Iter[ova$Iter=="1"]),
    "nrow/N", length(allr)/openv$N, "\n")
  cat("Number of unique rows:", length(uniqr), "\n")
  cat("Number of rows in a full array:", prod(cube), "\n")
  oprint(cube)
  nonuniqr <- setdiff(allr, uniqr)
#  cat("Non-unique rows:", nonuniqr, "\n")
#  oprint(head(ova@output[rownames(ova@output) %in% nonuniqr , ]))
  cubesm <- cube[cube>1 & cube<50]
  cubn <- names(cubesm)
  for(i in 2:(length(cubn))) {
    for(j in 1:(i-1)){
      oprint(c(cubn[i], cubn[j]))
      oprint(table(ova@output[[cubn[i]]], ova@output[[cubn[j]]], useNA="ifany"))
    }
  }

  for(i in colnames(ova@output)[ova@marginal]) {
    locs <- ova@output[[i]]
    exper <- prod(cube[names(cube) != i])
    oprint(c(i, exper))
    for(j in unique(ova@output[[i]])) {
      cat(j, length(locs[locs == j]), ",")  
    }
  }
}

#####################################
# This function can be used to quickly locate indices that do not match between
# two ovariables and thus result in an output with 0 rows.
binoptest <- function(x, y) {
  if(nrow(x@output) == 0) cat(paste("Ovariable", x@name,"has 0 rows in output.\n"))
  if(nrow(y@output) == 0) cat(paste("Ovariable", y@name,"has 0 rows in output.\n"))
  commons <- intersect(colnames(x@output), colnames(y@output))
  commons <- commons[!grepl("Result$", commons)]
  cat("Ovariables have these common columns:\n")
  xt <- x@output
  yt <- y@output
  for (i in commons) {
    cat(i, "with shared locations\n")
    locs <- intersect(x@output[[i[1]]], y@output[[i[1]]])
    if(length(locs)>50) cat(">50 of them\n") else cat(locs, "\n")
    xt <- xt[xt[[i]] %in% locs , ]
    yt <- yt[yt[[i]] %in% locs , ]
    cat("Rows remaining", x@name, nrow(xt), y@name, nrow(yt), "\n")
  }
}

#### showLoctable lists locations of each index in the evaluated ovariables in the global environment.

showLoctable <- function(name = ".GlobalEnv") {
  loctable <- data.frame()
  
  for(i in ls(name = name)) {
    if(class(get(i)) == "ovariable") {
      for(j in colnames(get(i)@output)) {
        if(!(grepl("Source", j) | grepl("Result", j))) {
          loctable <- rbind(
            loctable,
            data.frame(
              Ovariable = i,
              Index = j,
              Class = paste(class(get(i)@output[[j]]), collapse=" "),
              Marginal = j %in% colnames(get(i)@output)[get(i)@marginal],
              NumLoc = length(unique(get(i)@output[[j]])),
              Locations = paste(head(unique(get(i)@output[[j]])), collapse = " ")
            )
          )
        }
      }
    }
  }
  return(loctable)
}

objects.store(showind, binoptest, showLoctable, ovashapetest)
cat("Functions showind, binoptest, showLoctable, ovashapetest stored.\n")

Functions for Webropol data

+ Show code - Hide code

#This is code Op_en6007/webropol on page [OpasnetUtils/Drafts]]

library(OpasnetUtils)

### webropol.convert converts a csv file from Webropol into a useful data.frame.

webropol.convert <- function(
  data, # Data.frame created from a Webropol csv file. The first row should contain headings.
  rowfact, # Row number where the factor levels start (in practice, last row + 3)
  textmark = "Other open" # The text that is shown in the heading if there is an open sub-question.
) {
  out <- dropall(data[2:(rowfact - 3) , ])
  subquestion <- t(data[1 , ])
  subquestion <- gsub("\xa0", " ", subquestion)
  subquestion <- gsub("\xb4", " ", subquestion)
  subquestion <- gsub("\n", " ", subquestion)
  #  subquestion <- gsub("\\(", " ", subquestion)
  #  subquestion <- gsub("\\)", " ", subquestion)
  textfield <- regexpr(textmark, subquestion) != -1
  subquestion <- strsplit(subquestion, ":") # Divide the heading into a main question and a subquestion.
  subqtest <- 0 # The previous question name.
  for(i in 1:ncol(out)) {
    #print(i)
    if(subquestion[[i]][1] != subqtest) { # If part of previous question, use previous fact.
      fact <- as.character(data[rowfact:nrow(data) , i]) # Create factor levels from the end of Webropol file.
      fact <- fact[fact != ""] # Remove empty rows
      fact <- gsub("\xa0", " ", fact)
      fact <- gsub("\xb4", " ", fact)
      fact <- gsub("\n", " ", fact)
      fact <- strsplit(fact, " = ") # Separate value (level) and interpretation (label)
    }
    if(length(fact) != 0 & !textfield[i]) { # Do this only if the column is not a text type column.
      out[[i]] <- factor(
        out[[i]], 
        levels = unlist(lapply(fact, function(x) x[1])), 
        labels = unlist(lapply(fact, function(x) x[2])), 
        ordered = TRUE
      )
    }
    subqtest <- subquestion[[i]][1]
  }
  return(out)
}

# merge.questions takes a multiple checkbox question and merges that into a single factor.
# First levels in levs have priority over others, if several levels apply to a row.

merge.questions <- function(
  dat, # data.frame with questionnaire data
  cols, # list of vectors of column names or numbers to be merged into one level in the factor
  levs, # vector (with the same length as cols) of levels of factors into which questions are merged.
  name # text string for the name of the new factor column in the data.
) {
  for(i in length(cols):1) {
    temp <- FALSE
    for(j in rev(cols[[i]])) {
      temp <- temp | !is.na(dat[[j]])
    }
    dat[[name]][temp] <- levs[i]
  }
  dat[[name]] <- factor(dat[[name]], levels = levs, ordered = TRUE)
  return(dat)
}


objects.store(webropol.convert, merge.questions)
cat("Functions webropol.convert, merge.questions stored.\n")

Miscellaneous functions

+ Show code - Hide code

#This is code Op_en6007/webropol on page [OpasnetUtils/Drafts]]

library(OpasnetUtils)

############ Shuffles columns of a data.frame so that they match a pre-defined correlation matrix

#### THIS SHOULD BE UPDATED FOR OVARIABLES AS WELL: SHUFFLING ACROSS Iter WITH
#### CORRELATION MATRIX ACROSS DEFINED INDICES AND THEIR LOCATIONS. OTHER INDICES ARE
#### KEPT UNCHANGED, SO THE SHUFFLING HAS TO HAPPEN WITHIN EACH UNIQUE LOCATION COMBINATION.

correlvar <- function(
  vars, # multivariable object to be correlated.
  Sigma # covariance matrix wanted.
) {
  
  # Method from http://www.r-bloggers.com/easily-generate-correlated-variables-from-any-distribution-without-copulas/
  require(MASS)
  mu <- rep(0,ncol(vars))
  rawvars <- as.data.frame(mvrnorm(n = nrow(vars), mu = mu, Sigma = Sigma))
  out <- as.data.frame(
    lapply(
      1:ncol(vars), 
      FUN = function(i, vars, rawvars) {
        pvars <- rank(rawvars[[i]], ties.method = "random")
        tmp <- sort(vars[[i]]) # Make sure you start with ordered data.
        tmp <- tmp[pvars] # Order based on correlated ranks
        return(tmp)
      },
      vars = vars,
      rawvars = rawvars
    )
  )
  colnames(out) <- colnames(vars)
  return(out)
}

##################### Forgets decisions so that decision indices will be recreated.

forgetDecisions <- function() {
	for(i in ls(envir = openv)) {
		if("dec_check" %in% names(openv[[i]])) openv[[i]]$dec_check <- FALSE
	}
	return(cat("Decisions were forgotten.\n"))
}

################## Sähkön hinta tunneittain

#price <- opbase.data(ident="op_en7353")
#temperature <- opbase.data("op_en6315.2014_5_2015")
#temperature$Date <- substr(temperature$Date, 0, 11)
#price$Date <- substr(price$Date, 0, 11)
#mon <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#for (i in mon) {
# price$Date <- gsub(i, which(mon == i), as.character(price$Date))
#}
#for (i in mon) {
# temperature$Date <- gsub(i, which(mon == i), as.character(temperature$Date))
#}
#price$Hours <- substr(price$Hours, 0, 2)
#price$Hours <- paste(price$Hours, ":00:00", sep="")
#temperature$Time <- paste(temperature$Time, ":00", sep="")
#as.character(temperature$Result)
#as.numeric(temperature$Result)
#cut(temperature$Result, breaks = c(-21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30), #include.lowest=TRUE)
#DateTime <- as.POSIXct(paste(temperature$Date, temperature$Time), format="%Y-%m-%d %H:%M:%S")
#DateHours <- as.POSIXct(paste(price$Date, price$Hours), format="%Y-%m-%d %H:%M:%S")

# fillna takes a data.frame and fills the cells with NA with each level in that column.
# fillna was updated in OpasnetUtils and therefore removed from here.


## collapsemarg is a placeholder for a better functionality within CollapseMarginals.
## It takes an ovariable, and summarises all indices in cols using tapply and a user-defined function.
## However, you can also use function "pick" to select locations defined in a list picks found in indices cols.
## Function "unkeep" simply drops the unkept indices without any other operation.
## The output is an ovariable with the same name as the input.
## This was first created for [[:op_fi:Radonin terveysvaikutukset]]

collapsemarg <- function(variable, cols, fun = "sum", picks = list(), ...) { # cols is a character vector, while probs is a list
	out <- dropall(variable@output)
	marginals <- colnames(out)[variable@marginal]

	if(tolower(fun) == "unkeep") { # The function must be a string, otherwise this row will fail.
		out <- out[!colnames(out) %in% cols]
	} else {
		if(tolower(fun) == "pick") {
			for(i in cols) {
				out <- out[out[[i]] %in% picks[[match(i, cols)]] , ]
			}
			cols <- "" # Those locations that were picked are still marginals.
		} else {
			margtemp <- colnames(out)[colnames(out) %in% marginals & !colnames(out) %in% cols] 
			# You must leave at least one index.

			out <- as.data.frame(as.table(tapply(result(variable), out[margtemp], fun)))
			out <- out[!is.na(out$Freq) , ]

			colnames(out)[colnames(out) == "Freq"] <- ifelse(
				length(variable@name) == 0, 
				"Result", 
				paste(variable@name, "Result", sep = "")
			)
		}
	}

	variable@output <- out
	variable@marginal <- colnames(out) %in% marginals & ! colnames(out) %in% cols
	
	return(variable)
}


# Merge all but show_bins largest bins of indices cols to 'Other'.

truncateIndex <- function( # Truncates indices to contain only the largest index bins.
		obj, # ovariable to use.
		cols, # names of the columns to truncate.
		bins = rep(10, length(cols)), # Number of bins to show, including Others. Smallest locations will be lumped to bin "Other".
		sum_others = TRUE # Should "Other" be summed to maintain marginal status
) {
	if(nrow(obj@output) == 0) stop("Ovariable ", obj@name, " not evaluated.\n")
	test <- oapply(abs(obj), INDEX = cols, sum, na.rm = TRUE)
	if(length(cols) > 1 & length(bins) == 1) bins <- rep(bins, length(cols))
	
	for(i in 1:length(cols))
	{
		test2 <- oapply(test, INDEX = cols[i], sum)
		test2@output <- test2@output[result(test2) > 0 , ]
		
		temp <- as.factor(obj@output[[cols[i]]])
		location_weight_order <- order(result(test2), decreasing = TRUE)
		keeps <- test2@output[[cols[i]]][location_weight_order[0:min(bins[i] - 1, nrow(test2@output))]]
		levels(temp)[!levels(temp) %in% keeps] <- "Other"
		temp <- factor(temp, levels = c(levels(temp)[levels(temp) != "Other"], "Other"))
		obj@output[[cols[i]]] <- temp
	}
	# After changing some locations to "Other", sum along indices to avoid problems
	if(sum_others) {
		obj <- oapply(obj, cols = "", FUN = sum, na.rm = TRUE)
	}
	return(obj)
}

findrest <- function (X, cols, total = 1)
{
	# findrest input is an ovariable that can be integrated over indices cols to result in total.
	# Often it is used with uncertain fractions. One (often the largest) fraction is 
	# omitted or NA, and it is replaced by whatever is missing from the total.
	if (nrow(X@output) == 0) 
		X <- EvalOutput(X)
	rescol <- paste(X@name, "Result", sep = "")
	marginals <- colnames(X@output)[X@marginal]

	# temp is the amount that is still missing from the total.
	temp <- total - oapply(X, cols = cols, FUN = function(x) sum(x, na.rm = TRUE))

	# Remove old rescol because it would cause trouble later.
	if(rescol != paste(temp@name, "Result", sep = "")) temp <- unkeep(temp, cols = rescol) 
	colnames(temp@output)[colnames(temp@output) == paste(temp@name, "Result", sep = "")] <- "tempResult" 
	temp@name <- "temp" # This is to make sure that merge works.

	out <- merge(X, temp)@output

	# Replace missing values with values from temp.
	out[[rescol]] <- ifelse(is.na(out[[rescol]]), out$tempResult, out[[rescol]]) # Result comes from temp
	out$tempResult <- NULL
	X@output <- out
	X@marginal <- colnames(X@output) %in% marginals
	return(X)
}

timing <- function(dat, timecol = NA, weeks = 6, tz = "EET") 
# timing converts character or numeric inputs into times using a few default formats
{
	if(is.data.frame(dat)) # Turn dat into a data.frame in all cases.
	{
		timesall <- (dat[timecol])
	} else {
		timesall <- data.frame(Timecol = dat)
		timecol <- "Timecol"
	}

	temprow <- character() # A whole row of timesall collapsed into a string.
	
	for(i in 1:nrow(timesall))
	{
		temprow[i] <- tolower(paste(t(timesall)[ , i], collapse = ""))
	}

	weekdays <- data.frame(
		Name = c("su", "ma", "ti", "ke", "to", "pe", "la", 
		"sun", "mon", "tue", "wed", "thu", "fri", "sat",
		"sön", "mon", "tis", "ons", "tor", "fre", "lör"),
		Number = rep(0:6, times = 3)
	)
	
	# day is the number of the weekday in case of repeating events.

	day <- rep(NA, nrow(timesall))
	for(i in 1:nrow(weekdays)) 
	{
		temp1 <- grepl(weekdays$Name[i], temprow) # Is any weekday mentioned in temprow?
		day <- ifelse(is.na(day) & temp1, weekdays$Number[i], day) # Find the weekday number.
	}

	# repall are repeating events, x are non-repeating events.
	
	repall <- timesall[!is.na(day) , ]
	xall <- timesall[is.na(day) , ]
	starting <- NA
	
	########### First, change non-repeating timedates.
	# Change timedate into POSIXct assuming formats 15.3.2013 or 2013-03-15 and 15:24 or 15.24.
	# Note! 13 and 2013 mean 1.1.2013 and 3.2013 means 1.3.2013
		
	if(nrow(xall) > 0)
	{
		xout <- data.frame(Datrow = (1:nrow(timesall))[is.na(day)]) # Row numbers from dat.
			
		for(j in colnames(xall))
		{
			x <- xall[[j]]
			if(is.factor(x)) x <- levels(x)[x]
			
			x <- ifelse(grepl("^[0-9][0-9]$", x), paste("20", x, sep = ""), x)
			x <- ifelse(grepl("^[0-9][0-9][0-9][0-9]$", x), paste("1.1.", x, sep = ""), x)
			x <- ifelse(grepl("^[0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)
			x <- ifelse(grepl("^[0-9][0-9].[0-9][0-9][0-9][0-9]$", x), paste("1.", x, sep = ""), x)

			temp <- x
			x <- as.POSIXct(temp, format = "%d.%m.%Y %H:%M", tz = tz)
			x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y %H.%M", tz = tz))
			x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H:%M", tz = tz))
			x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%Y-%m-%d %H.%M", tz = tz))
			x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%Y", tz = tz))
			x <- ifelse(!is.na(x), x, as.POSIXct(temp, format = "%d.%m.%y", tz = tz))

			xout <- cbind(xout, X = as.POSIXct(x, origin = "1970-01-01", tz = tz)) 
			# Intermediate values turn into numeric, therefore turned back to POSIX.

			starting <- min(c(starting, xout$X), na.rm = TRUE)
			
			colnames(xout)[colnames(xout) == "X"] <- j
		}
	}
	
	############## Then, change weekly repeating timedates.
	
	# repall must have format ma 9:00 or TUESDAY 8.24. Weekday is case-insensitive and can be abbrevieated.
	# The start and end times are assumed to be on the same day. The name of day can be on either column.

	if(nrow(repall) > 0)
	{
		
		for(j in colnames(repall))
		{
			
			reptime <- gsub("[[:alpha:] ]", "", repall[[j]]) # Remove alphabets and spaces.
			reptime <- gsub("\\.", ":", reptime)
			reptime <- strsplit(reptime, split = ":")

			temp2 <- numeric()
			for(i in 1:length(reptime)) 
			{
				temp2[i] <- as.numeric(reptime[[i]][1]) * 3600 + as.numeric(reptime[[i]][2]) * 60
			}

			reptime <- temp2

			if(is.na(starting)) starting <- paste(format(Sys.Date(), format = "%Y"), "-01-01", sep = "")
			starting <-  as.POSIXlt(starting, origin = "1970-01-01") # First day of year.
			starting$mday <- starting$mday - as.numeric(format(starting, format = "%w")) + day[!is.na(day)] # Previous Sunday plus weekdaynumber.

			reps <- data.frame()
			temp3 <- starting 

			for(i in 1:weeks)
			{
				reps <- rbind(reps, data.frame(
					Datrow = (1:nrow(timesall))[!is.na(day)],
					X = as.POSIXct(temp3, origin = "1970-01-01", tz = tz) + reptime
				))
				temp3$mday <- temp3$mday + 7 # Make a weekly event.
			}
			colnames(reps)[colnames(reps) == "X"] <- j

			if(j == colnames(repall)[1]) repout <- reps else repout <- cbind(repout, reps[j])
		}
	}
	out <- rbind(xout, repout)
	
	if(is.data.frame(dat))
	{
		dat$Datrow <- 1:nrow(dat)
		dat <- dat[!colnames(dat) %in% timecol]
		out <- merge(dat, out)
		out <- out[colnames(out) != "Datrow"]
	} else {
		out <- out[[timecol]]
	}
	
	return(out)
}


# Funktio makeTimeline ottaa tapahtumalistauksen ja rakentaa siitä aikajanan. Parametrit:
# event  =  data.framena tapahtumalistaus, joka sisältää ainakin alku- ja loppuajan (Alku, Loppu) ja
# tapahtumatiedon sekä mahdollisesti toiston ja keston (Toisto = aikaväli päivinä, 
# Kesto = viimeinen tapahtuma-aika).
# timeformat = jos TRUE, oletetaan POSIX-muotoiseksi ja muutetaan operointia varten sekunneiksi.
# Jos FALSE, oletetaan reaaliluvuksi ja operoidaan suoraan luvuilla.

makeTimeline <- function(event, timeformat = TRUE) { 
	
	# eventiin luodaan Toisto ja Kesto, jos jompikumpi puuttuu.
	if(!all(c("Toisto", "Kesto") %in% colnames(event)))
	{
		event$Toisto <- NA
		event$Kesto <- NA
	}
	
	if(timeformat)
	{
		for(m in c("Alku", "Loppu", "Kesto"))
		{
			event[[m]] <- as.double(as.POSIXct(event[[m]])) # Muutetaan aika sekunneiksi.
		}
		event$Toisto <- event$Toisto * 3600 * 24 # Muutetaan Toisto päivistä sekunneiksi.
	}
	
	# Jos on puuttuvaa tietoa kestosta tai toistosta, korvataan inertillä datalla.
	test <- event$Toisto == 0 | event$Kesto == 0 | is.na(event$Toisto) | is.na(event$Kesto)
	event$Toisto <- ifelse(test, 1, event$Toisto)
	event$Kesto <- ifelse(test, event$Alku, event$Kesto)

	# Luodaan aikajanan alkupiste. Eventrow otetaan aikanaan tyhjältä riviltä.

	timeline <- data.frame(Time = min(event$Alku), Eventrow = nrow(event)+1) 
	
	for(i in 1:nrow(event)) { # Toista jokaiselle havaintoriville
		times <- 0:floor((event$Kesto[i] - event$Alku[i]) / event$Toisto[i]) # Toistojen määrä

		# Toista tapahtumaa Kestoon asti.
		temp <- data.frame(
			Time = event$Alku[i] + times * event$Toisto[i], 
			End = event$Loppu[i] + times * event$Toisto[i], 
			EventrowStart = i
		)
		
		timeline <- merge(timeline, temp[ , c("Time", "EventrowStart")], all = TRUE) # Lisätään tapahtumat aikajanaan.
		colnames(temp) <- c("Remove", "Time", "EventrowEnd") # Muutetaan otsikot, koska nyt halutaan mergata loppuhetket aikajanaan.
		timeline <- merge(timeline, temp[, c("Time", "EventrowEnd")], all = TRUE)
		for(j in 2:nrow(timeline)) {
		# Tämä luuppi käy aikajanan läpi ja täydentää tapahtumat.
		
			# Ensin kaikkiin uusiin aikapisteisiin jatketaan sitä aiempaa toimintaa, joka oli menossa edellisessä pisteessä.
			if(is.na(timeline$Eventrow[j])) 
			{
				timeline$Eventrow[j] <- timeline$Eventrow[j-1]
			}

			# Sitten jatketaan uutta toimintaa niihin uusiin pisteisiin, jotka eivät ole loppupisteitä.
			if(is.na(timeline$EventrowStart[j]) & is.na(timeline$EventrowEnd[j])) 
			{
				timeline$EventrowStart[j] <- timeline$EventrowStart[j-1]
			}
		}
			
		# Jos uutta toimintaa on olemassa, statuksena käytetään sitä, muutoin statusta eli aiempaa toimintaa.
		timeline$Eventrow <- ifelse(!is.na(timeline$EventrowStart), timeline$EventrowStart, timeline$Eventrow)
		
		# Leikataan turhat sarakkeet pois ja siirrytään seuraavalle event-riville
		timeline <- timeline[ , c("Time", "Eventrow")] 
	}

	event <- rbind(event, rep(NA, ncol(event))) # Lisää rivi loppuhetkeä varten
	event$Alku[nrow(event)] <- max(timeline$Time)
	event$Eventrow <- row(event)[ , 1]

	timeline <- merge(timeline, event) # Yhdistetään Statuksen eli eventin rivinumeron avulla.
	timeline <- timeline[!colnames(timeline) %in% c("Eventrow", "Alku", "Loppu", "Toisto", "Kesto")]
	timeline <- timeline[order(timeline$Time) , ]

	if(timeformat) timeline$Time <- as.POSIXct(timeline$Time, origin = "1970-01-01")
	
	return(timeline)
}

# Calculate the cumulative impact of the events on building stock to given years

timepoints <- function(
	# Function timepoints takes an event list and turns that into existing crosscutting situations at 
	# timepoints defined by years. The output will have index Time.
	# In other words, this will integrate over obstime at specified timepoints.
	X, # X must be an ovariable with a column of the same name as obstime.
	obstime, # obstime must be a single-column data.frame of observation times. 
	sumtimecol = TRUE # Should the timecol be summed up?
	# obstime and timecol may be numeric (by coercion) or POSIXt.
) {
	timecol <- colnames(obstime)
	marginals <- colnames(X@output)[X@marginal]

	# tapply (and therefore possibly oapply) changes continuous indices to factors! Must change back by hand.
	if("factor" %in% c(class(obstime[[timecol]]), class(X@output[[timecol]]))) {
		X@output[[timecol]] <- as.numeric(as.character(X@output[[timecol]]))
		obstime[[timecol]] <- as.numeric(as.character(obstime[[timecol]]))
	}

	out <- data.frame()
	if(sumtimecol) by <- setdiff(marginals, timecol) else by <- marginals

	for(i in obstime[[timecol]]) {
		temp <- X@output[X@output[[timecol]] <= i , ]
		if(nrow(temp) > 0) {
			temp <- aggregate(
				temp[paste(X@name, "Result", sep = "")],
				by = temp[colnames(temp) %in% by],
				FUN = sum
			)
		}
		if(nrow(temp) > 0) out <- rbind(out, data.frame(Time = i, temp))
	}
	X@output <- out
	X@marginal <- colnames(out) %in% c("Time", marginals) # Add Time to marginal

	return(X)
}

# ana2ova takes in variable tables from Analytica (produced with Copy Table). The output is a data.frame in long format.
ana2ova <- function(dat) 
{
	i <- 1 # Number of indices
	cols <- character() # Names of indices
	locs <- list() # Vectors of locations for each index.
	out <- data.frame() # output
	repeat{ # Find out cols and locs from dat.
		temp <- strsplit(dat[i + 1], split = "\t")[[1]]
		cols[i] <- temp[1]
		if(length(temp) == 1) break
		locs[[i]] <- temp[2:length(temp)]
		i <- i + 1
	}
	locs[[i]] <- NA # The innermost index should have its place in locs even if the locations are not yet known.

	inner <- (1:length(dat))[dat %in% cols[i]] # Places where the innermost index is mentioned (the data starts from the next row).
	if(!all(dat[inner - 1] == dat[i]) & i > 1) stop("Structure incorrect\n")
	if(length(inner) == 1) # len is the length of the innermost index.
	{
		len <- length(dat) - i - 1
	} else {
		len <- inner[2] - inner[1] - i
	}

	for(k in inner) # Go through each table with the inner and second most inner indices.
	{
		ins <- strsplit(dat[(k + 1):(k + len)], split = "\t") # Make a data.frame.
		ins <- data.frame(matrix(unlist(ins), nrow = len, byrow = TRUE))

		if(i == 1) cn <- "value" else cn <- locs[[i - 1]]
		colnames(ins) <- c(cols[i], cn) # Give location names to columns
		if(i > 1) ins <- melt(ins, id.vars = cols[i], variable.name = cols[i - 1]) # If several columns.
		if(i > 2) # If there are several tables.
		{
			uplocs <- data.frame(Removethis = 0)
			for(l in 1:(i - 2))
			{
				uplocs[[cols[l]]] <- strsplit(dat[k - i + l], split = "\t")[[1]][2]
			}
			out <- rbind(out, cbind(uplocs, ins)) # Collects all tables into a data.frame.
		} else {
			out <- ins
		}
	}
	out$Removethis <- NULL
	colnames(out)[colnames(out) == "value"] <- "Result"
	return(out)
}

rm(wiki_username)
objects.store(list = ls())
cat("All objects in the global namespace were stored:", ls(), "\n")

Functions for GIS data

+ Show code - Hide code

#This is code Op_en6007/gis on page [OpasnetUtils/Drafts]]

library(OpasnetUtils)

MyPointKML <- function( # The function creates a KML fille from a SpatialPointsDataFrame object.
	obj = NULL, # Spatial object with the data. A SpatialPointsDataFrame.
	kmlname = "", # Name of the KML fille (does this show on the map?)
	kmldescription = "", # Description of the KML fille (does this show on the map?)
	name = NULL, # Name for each datapoint (vector with the same length as data in obj).
	description = "", # Descrtion of each datapoint (vector with the same length as data in obj).
	icon = "http://maps.google.com/mapfiles/kml/pal4/icon24.png", # Icon shown on pin (?)
	col=NULL # I don't know what this does.
) {

cat("This function MyPointKML is depreciated. Use google.point_kml in OpasnetUtilsExt instead.\n")

    if (is.null(obj)) 
        return(list(header = c("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", 
            "<kml xmlns=\"http://earth.google.com/kml/2.2\">", 
            "<Document>", paste("<name>", kmlname, "</name>", 
                sep = ""), paste("<description><![CDATA[", kmldescription, 
                "]]></description>", sep = "")), footer = c("</Document>", 
            "</kml>")))
    if (class(obj) != "SpatialPointsDataFrame") 
        stop("obj must be of class 'SpatialPointsDataFrame' [package 'sp']")
    if (is.null(name)) {
        name = c()
        for (i in 1:nrow(obj)) name <- append(name, paste("site", 
            i))
    }
    if (length(name) < nrow(obj)) {
        if (length(name) > 1) 
            warning("kmlPoints: length(name) does not match nrow(obj). The first name will be replicated.")
        name <- rep(name, nrow(obj))
    }
    if (length(description) < nrow(obj)) {
        if (length(description) > 1) 
            warning("kmlPoints: length(description) does not match nrow(obj). The first description will be replicated.")
        description <- rep(description, nrow(obj))
    }
    if (length(icon) < nrow(obj)) {
        if (length(icon) > 1) 
            warning("kmlPoints: length(icon) does not match nrow(obj). Only the first one will be used.")
        icon <- icon[1]
    }

	# This is some kind of a colour definition
	
	col2kmlcolor <- function(col) 
		paste(rev(sapply(
			col2rgb(col, TRUE), 
			function(x) sprintf("%02x", x))
		), collapse = "")
	
    kml <- kmlStyle <- ""
    
	# Create the KML fille.
	
	kmlHeader <- c("<?xml version=\"1.0\" encoding=\"UTF-8\"?>","<kml xmlns=\"http://earth.google.com/kml/2.2\">", "<Document>")
    kmlFooter <- c("</Document>", "</kml>")
    
	# Create rows to the KML fille from data in obj.
	
    for (i in 1:nrow(obj)) {
        point <- obj[i, ]
        pt_style <- paste("#style", ifelse(length(icon) == 1, 1, i), sep = "")
        kml <- append(kml, "<Placemark>")
        kml <- append(kml, paste(
		"  <description><![CDATA[",
		name[i],
		": ",
		description[i], 
		"]]></description>", 
		sep = ""
	))
	#kml <- append(kml, "<Style><IconStyle>")
	#kml <- append(kml, paste("<color>", col2kmlcolor(col[i]), "</color>", sep =""))
	#kml <- append(kml, paste("  <Icon><href>", icon, "</href></Icon>", sep = ""))
	#kml <- append(kml, "<scale>0.300000</scale>")
	#kml <- append(kml, "</IconStyle></Style>")
        kml <- append(kml, "  <Point>")
        kml <- append(kml, "    <coordinates>")
        kml <- append(kml, paste(point@coords[1], point@coords[2], sep = ","))
        kml <- append(kml, "    </coordinates>")
        kml <- append(kml, "  </Point>")
        kml <- append(kml, "</Placemark>")
    }
    
    return(paste(paste(c(kmlHeader, kmlStyle, kml, kmlFooter), sep = "", collapse = "\n"), collapse="\n", sep = ""))
}

ova2spat <- function( # This function converts an ovariable or a data.frame into a SpatialPointsDataFrame.
  dat, # An evaluated ovariable or data.frame that has coordinate indices.
  coords = c("LO", "LA"), # The names of the coordinate indices as a character vector, first x then y.
  proj4string = NULL # Projection identifier or specification as character string. See http://spatialreference.org/
  # If proj4string is NULL, longitude-latitude system is assumed.
) {
  if(class(dat) == "ovariable") temp <- dat@output else
    if(is.data.frame(dat)) temp <- dat else
      stop("object must be either evaluated ovariable or data.frame\n")
  
  # Transform coordinates into numeric format.
  
  for(i in coords) { 
    temp[[i]] <- as.numeric(as.character(temp[[i]]))
  }
  
  # Define the coordinate points first, then add other ovariable output to it.
  
  if(is.null(proj4string)) {
    sp <- SpatialPoints(temp[coords], CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))
    } else {
      sp <- SpatialPoints(temp[coords], CRS(proj4string))
    }
  out <- SpatialPointsDataFrame(sp, temp[!colnames(temp) %in% coords])
  
  #Transform the projection to longitude-latitude system.
  if(!is.null(proj4string)) {
    epsg4326String <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
    out <- spTransform(out,epsg4326String)
  }
  
  return(out)
}


# MyRmap is a function for creating static Google maps as png.
# It is based on MyMap function without the "file destination" parameter
# Requires RgoogleMaps package

MyRmap <- function (
	shp, # a spatial data object
	plotvar, # Name of the column that has the values to be illustrated on the map
	pch = 19, # Shape of the point (19: circle)
	cex = 0.3, # Size of the point
	legend_title = "", # Title of the legend
	legend_position = "topleft",
	numbins = 8, # Number of colour bins in graph
	center, # center of the map
	size = c(640, 480), # size of the map. This produces the right dimensions in Opasnet.
	MINIMUMSIZE = FALSE, 
	RETURNIMAGE = TRUE, 
	GRAYSCALE = FALSE, 
	NEWMAP = TRUE, 
	zoom, 
	verbose = 1, 
	...
) {
	plotvar	<- shp[[plotvar]]
	plotclr	<- brewer.pal(numbins, "Spectral")
	classes	<- classIntervals(plotvar, numbins, style = "quantile")
	colcode	<- findColours(classes, plotclr)
	latR	<- shp@coords[ , 2]
	lonR	<- shp@coords[ , 1]

	#get the bounding box:

	bb <- qbbox(lat = latR, lon = lonR)

	if (missing(zoom)) 
		zoom <- min(MaxZoom(latR, lonR, size))
	if (missing(center)) {
		lat.center <- mean(latR)
		lon.center <- mean(lonR)
	}
	else {
		lat.center <- center[1]
		lon.center <- center[2]
	}
	if (MINIMUMSIZE) {
		ll <- LatLon2XY(latR[1], lonR[1], zoom) # I think the latR and lonR are used here differently than how they
		ur <- LatLon2XY(latR[2], lonR[2], zoom) # are used elsewhere. Thus, if MINIMUMSIZE = TRUE, you may see problems.
		cr <- LatLon2XY(lat.center, lon.center, zoom)
		ll.Rcoords <- Tile2R(ll, cr)
		ur.Rcoords <- Tile2R(ur, cr)
		if (verbose > 1) {
			cat("ll:")
			print(ll)
			print(ll.Rcoords)
			cat("ur:")
			print(ur)
			print(ur.Rcoords)
			cat("cr:")
			print(cr)
		}
		size[1] <- 2 * max(c(ceiling(abs(ll.Rcoords$X)), ceiling(abs(ur.Rcoords$X)))) + 1
		size[2] <- 2 * max(c(ceiling(abs(ll.Rcoords$Y)), ceiling(abs(ur.Rcoords$Y)))) + 1

		if (verbose) cat("new size: ", size, "\n")
	}

	MyMap <- GetMap(
		center = c(lat.center, lon.center),
		zoom = zoom, 
		size = size, 
		RETURNIMAGE = RETURNIMAGE, 
		GRAYSCALE = GRAYSCALE, 
		verbose = verbose, 
		...
	)

	PlotOnStaticMap(MyMap) # Plot an empty map.

	PlotOnStaticMap( # Plot the data points on the map.
		MyMap,
		lat = latR,
		lon = lonR,
		pch = pch,
		cex = cex,
		col = colcode,
		add = T
	)

	legend( # Plot the legend on the map.
		legend_position, 
		legend	= names(attr(colcode, "table")),
		title	= legend_title, 
		fill	= attr(colcode, "palette"), 
		cex	= 1.0, 
		bty	= "y",
		bg	= "white"
	)
}

MyPlotKML <- function(
	shp, # a SpatialPointDataFrame object.
	result = "Result", # The name of  result column in shp.
	rasterization = TRUE, # Whether to rasterize the data or not.
	ncols = 32, # Number or columns in the raster.
	nrows = 32, # Number of rows in the raster.
	fun = mean # function to aggregate data points to the raster.
) {	

cat("Consider merging this function MyPolotKML with google.show_raster_on_maps in OpasnetUtilsExt.\n")

	if(rasterization) {
		#Create blank raster
		rast		<- raster()

		#Set raster extent to that of point data
		extent(rast)	<-extent(shp)

		#Choose number of columns and rows
		ncol(rast)	<- ncols
		nrow(rast)	<- nrows

		#Rasterize point data
		rast2	<- rasterize(shp, rast, shp[[result]], fun = fun)

	}

	start <- 0 # min(shp[[result]])
	end <- max(shp[[result]])
	steps <- approx(c(start,end),n=6)$y
	colors <- rev(rainbow(length(steps), start=0, end=0.50))

	# Create the colorstrip below the map.

	par(mfrow=c(6,1), mar=c(3,1,0,1), cex = 1.5)

	colorstrip <- function(colors, labels)
	{
		count <- length(colors)
		image(
			matrix(1:count, count, 1), 
			col = colors, 
			ylab = "", 
			axes = FALSE
		)
		axis(1,approx(c(0, 1), n=length(labels))$y, labels)
	}

	colorstrip(colors, steps)

	#Plot data

	google.show_raster_on_maps(rast2, col = colors, style = "height:500px;")
}

objects.store(MyPointKML, ova2spat, MyRmap, MyPlotKML)
cat("Functions MyPointKML, ova2spat, MyRmap, MyPlotKML stored.\n")

References

Related files

Difference between revisions of "OpasnetUtils/Drafts"

Revision as of 08:14, 6 June 2017

Contents

Question

Answer

Rationale

Calculations

See also

References

Related files

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

Page Tools

Tools

In other websites