Difference between revisions of "OpasnetUtils/Tidy"

From Testiwiki
Jump to: navigation, search
m (Some thoughts added, implementation to follow)
(Code: Discussed changes implemented. Conversion to proper class for resulting data.frame constituents added.)
Line 21: Line 21:
 
# TIDY ########### tidy: a function that cleans the tables from Opasnet Base
 
# TIDY ########### tidy: a function that cleans the tables from Opasnet Base
 
# data is a table from op_baseGetData function
 
# data is a table from op_baseGetData function
tidy <- function (data, idvar = "obs", direction = "wide") {
+
tidy <- function (data, objname = "", idvar = "obs", direction = "wide") {
 
+
data$Result <- ifelse(data$Result.Text == "", data$Result, as.character(data$Result.Text))
data$Result <- ifelse(!is.na(data$Result.Text), as.character(data$Result.Text), data$Result)
+
#data <- data[
if("Observation" %in% colnames(data)){test <- data$Observation != "Description"} else {test <- TRUE}
+
# ifelse("Observation" %in% colnames(data),
data <- data[test, !colnames(data) %in% c("id", "Result.Text")]
+
# data$Observation != "Description",
if("obs.1" %in% colnames(data)) {data[, "obs"] <- data[, "obs.1"]} # this line is temporarily needed until the obs.1 bug is fixed.
+
# TRUE
data <- data[colnames(data) != "obs.1"]
+
# ),
 +
# !colnames(data) %in% c("id", "Result.Text")
 +
#]
 +
data <- data[, !colnames(data) %in% c("id", "Result.Text")]
 +
if("obs.1" %in% colnames(data)) { # this line is temporarily needed until the obs.1 bug is fixed.
 +
data[, "obs"] <- data[, "obs.1"]
 +
data <- data[, colnames(data) != "obs.1"]
 +
}
 
if("Row" %in% colnames(data)) { # If user has given Row, it is used instead of automatic obs.
 
if("Row" %in% colnames(data)) { # If user has given Row, it is used instead of automatic obs.
 
data <- data[, colnames(data) != "obs"]
 
data <- data[, colnames(data) != "obs"]
 
colnames(data)[colnames(data) == "Row"] <- "obs"
 
colnames(data)[colnames(data) == "Row"] <- "obs"
 
}
 
}
if(direction == "wide" & "Observation" %in% colnames(data))  
+
if (objname != "") objname <- paste(objname, ":", sep = "")
{
+
if (direction == "wide") {
data <- reshape(data, idvar = idvar, timevar = "Observation", v.names = "Result", direction = "wide")
+
if("Observation" %in% colnames(data)) {
data <- data[colnames(data) != "obs"]
+
cols <- levels(data$Observation)
colnames(data) <- gsub("^Result.", "", colnames(data))
+
data <- reshape(data, idvar = idvar, timevar = "Observation", v.names = "Result", direction = "wide")
colnames(data)[colnames(data) == "result"] <- "Result"
+
data <- data[colnames(data) != "obs"]
colnames(data)[colnames(data) == "Amount"] <- "Result"
+
colnames(data) <- gsub("^Result.", objname, colnames(data))
}
+
for (i in paste(objname, cols, sep = "")) {
else
+
a <- as.numeric(data[, i])
{
+
if (sum(is.na(a)) == 0) data[, i] <- a else data[, i] <- factor(data[, i])
data <- data[colnames(data) != "obs"]
+
}
 +
colnames(data)[grepl(paste("^", objname, "result", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
 +
colnames(data)[grepl(paste("^", objname, "Amount", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
 +
return(data)
 +
}
 +
if("Parameter" %in% colnames(data)) {
 +
cols <- levels(data$Parameter)
 +
data <- reshape(data, idvar = idvar, timevar = "Parameter", v.names = "Result", direction = "wide")
 +
data <- data[colnames(data) != "obs"]
 +
colnames(data) <- gsub("^Result.", objname, colnames(data))
 +
for (i in paste(objname, cols, sep = "")) {
 +
a <- as.numeric(data[, i])
 +
if (sum(is.na(a)) == 0) data[, i] <- a else data[, i] <- factor(data[, i])
 +
}
 +
colnames(data)[grepl(paste("^", objname, "result", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
 +
colnames(data)[grepl(paste("^", objname, "Amount", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
 +
return(data)
 +
}
 
}
 
}
 +
data <- data[,colnames(data) != "obs"]
 +
colnames(data)[colnames(data)=="Result"] <- paste(objname, "Result", sep = "")
 
return(data)
 
return(data)
 
}
 
}

Revision as of 07:06, 19 June 2012



Description

a function that cleans the tables from Opasnet Base

Parameters

  • data is a table from op_baseGetData function

Code

--# : Tidy should widen the "Parameter" index as well as "Observation". Also it should enable marginal recognition of the widened variables. --Teemu R 15:57, 18 June 2012 (EEST)

--# : Recognition could be achieved by adding a "variable name" -prefix to every location under the involved indices. --Teemu R 15:57, 18 June 2012 (EEST)

- Hide code

# TIDY ########### tidy: a function that cleans the tables from Opasnet Base
# data is a table from op_baseGetData function
tidy <- function (data, objname = "", idvar = "obs", direction = "wide") {
	data$Result <- ifelse(data$Result.Text == "", data$Result, as.character(data$Result.Text))
	#data <- data[
	#	ifelse("Observation" %in% colnames(data), 
	#		data$Observation != "Description",
	#		TRUE
	#	), 
	#	!colnames(data) %in% c("id", "Result.Text")
	#]
	data <- data[, !colnames(data) %in% c("id", "Result.Text")]
	if("obs.1" %in% colnames(data)) { # this line is temporarily needed until the obs.1 bug is fixed.
		data[, "obs"] <- data[, "obs.1"]
		data <- data[, colnames(data) != "obs.1"]
	}
	if("Row" %in% colnames(data)) { # If user has given Row, it is used instead of automatic obs.
		data <- data[, colnames(data) != "obs"]
		colnames(data)[colnames(data) == "Row"] <- "obs"
	}
	if (objname != "") objname <- paste(objname, ":", sep = "")
	if (direction == "wide") { 
		if("Observation" %in% colnames(data)) {
			cols <- levels(data$Observation)
			data <- reshape(data, idvar = idvar, timevar = "Observation", v.names = "Result", direction = "wide")
			data <- data[colnames(data) != "obs"]
			colnames(data) <- gsub("^Result.", objname, colnames(data))
			for (i in paste(objname, cols, sep = "")) {
				a <- as.numeric(data[, i])
				if (sum(is.na(a)) == 0) data[, i] <- a else data[, i] <- factor(data[, i])
			}
			colnames(data)[grepl(paste("^", objname, "result", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
			colnames(data)[grepl(paste("^", objname, "Amount", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
			return(data)
		}
		if("Parameter" %in% colnames(data)) {
			cols <- levels(data$Parameter)
			data <- reshape(data, idvar = idvar, timevar = "Parameter", v.names = "Result", direction = "wide")
			data <- data[colnames(data) != "obs"]
			colnames(data) <- gsub("^Result.", objname, colnames(data))
			for (i in paste(objname, cols, sep = "")) {
				a <- as.numeric(data[, i])
				if (sum(is.na(a)) == 0) data[, i] <- a else data[, i] <- factor(data[, i])
			}
			colnames(data)[grepl(paste("^", objname, "result", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
			colnames(data)[grepl(paste("^", objname, "Amount", sep = ""), colnames(data))] <- paste(objname, "Result", sep = "")
			return(data)
		}
	}
	data <- data[,colnames(data) != "obs"]
	colnames(data)[colnames(data)=="Result"] <- paste(objname, "Result", sep = "")
	return(data)
}

See also