+ Show code- Hide code
# This is code Op_en7749/bayes on page [[Goherr: Fish consumption study]]
library(OpasnetUtils)
library(ggplot2)
library(reshape2)
library(rjags)
library(car)
library(vegan)
#library(gridExtra) # Error: package ‘gridExtra’ was built before R 3.0.0: please re-install it
# Fish intake in humans
# Data from data.frame survey from page [[Goherr: Fish consumption study]]
# Start with salmon questions 46:49 (amounts eaten)
# Some preprocessing should be moved away from this code.
# Model assumes that questionnaire data follows binomial distribution with parameter p and max value levels.
# p depends on country (4 groups), sex (2), age (2) that are known individually.
# We should test whether sex makes a difference or whether it can be omitted.
# Predicted p for each question, determined by the group, are produced from the bayes model.
# p is used to sample answers, which are then combined with estimates of amounts related to each answer.
# Total amount eaten of each fish per modelled individual is finally calculated.
objects.latest("Op_en7749", "preprocess2") # [[Goherr: Fish consumption study]]: survey, surcol
agel <- as.character(unique(survey$Ages))
countryl <- sort(as.character(unique(survey$Country)))
genderl <- sort(as.character(unique(survey$Gender)))
fisl <- c("Salmon", "Herring")
# Interesting fish eating questions
surv <- survey[c(1,3,158,16,29,30,31,46:49,86,95:98)]
colnames(surv)
#[1] "Country" "Gender"
#[3] "Ages" "Fish eating"
#[5] "How often eat fish" "Salmon eating"
#[7] "Baltic salmon" "How often Baltic salmon"
#[9] "How much Baltic salmon" "How often side Baltic salmon"
#[11] "How much side Baltic salmon" "Eat Baltic herring"
#[13] "How often Baltic herring" "How much Baltic herring"
#[15] "How often side Baltic herring" "How much side Baltic herring"
test <- sapply(unique(surv[c(4,7,12)]), unique)#function(x) sum(is.na(x)))
oprint(table(surv[c(12,7,4)], useNA = "ifany"))
oprint(table(surv[c(13,12)], useNA = "ifany"))
# For estimating distributions, we should
#1 remove people with Fish eating = No (142)
#2 merge Eat Baltic herring = I don't know with No (How often BH = NA always)
#3 merge Baltic salmon = NA with No (because they usually have answered BH questions)
oprint(table(is.na(rowSums(sapply(surv[4:16], as.numeric)))))
# BUT: there are so many missing values, that we just model BH and BS separately now.
#surv$Ages <- match(surv$Ages, agel) # Not a factor, coerce to integer
surv <- as.data.frame(lapply(surv, FUN = function(x) as.integer(x))) # Coerce to integers
surv[is.na(surv[[12]]) | surv[[12]] == 3 , 12] <- 1 # Eat Baltic herring: I don't know --> No
surv[4:16] <- lapply(surv[4:16], FUN = function(x) x-1) # Scale to start from zero
# Row numbers for respondents that have eaten fish, Baltic salmon, and Baltic herring
eatfish <- surv[[4]] %in% 1
eatsalm <- !(surv[[7]] %in% 0 | is.na(rowSums(surv[7:11])))
eatherr <- surv[[12]] %in% 1 & !is.na(rowSums(surv[13:16]))
# Oletetaan, että covarianssimatriisi on vakio kaikille maille ja sukupuolille yms
# mutta keskiarvo on spesifi näille ja kysymykselle.
#qlen <- c(4,2,2,2,6,2,2,7,7,7,5,2,7,7,7,5) # Number of options in each question
# qlen not needed when dbinom is not used.
agel
countryl
genderl
fisl
####################### Descriptive statistics
oprint(cor(surv, use = "pairwise.complete.obs"))
# --> Baltic salmon and herring eating are correlated, so they should be estimated together
############################# Plot original data
## Eating frequencies of fish and Baltic salmon and herring with random noise, all
pl <- surv[eatfish,c(5,8,10,13,15)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
ggplot(data.frame(
X = rep(1,5), Y = 5:1,
legend = c("All", "Finland", "Sweden", "Denmark", "Estonia")
), aes(x=X, y=Y, label=legend))+
geom_text()+
labs(title="Fish eating questions with random noise")
## Fish eating questions with some random noise, all
pl <- surv[eatfish,c(5,6,7,12)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Fish eating questions with some random noise, FI
pl <- surv[surv$Country == 1 & eatfish,c(5,6,7,12)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Fish eating questions with some random noise, SWE
pl <- surv[surv$Country == 2 & eatfish,c(5,6,7,12)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Fish eating questions with some random noise, Dk
pl <- surv[surv$Country == 3 & eatfish,c(5,6,7,12)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Fish eating questions with some random noise, EST
pl <- surv[surv$Country == 4 & eatfish,c(5,6,7,12)]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Baltic herring questions with some random noise
pl <- surv[eatherr,13:16]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
## Baltic salmon questions with some random noise
pl <- surv[eatsalm,8:11]
scatterplotMatrix(
data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
)
############################################################
cat("Version with multivariate normal.\n")
ggplot(data.frame(A=1:2, B=1:2), aes(A, y=B))+geom_point()+
labs(title="Version with multivariate normal.")
mod <- textConnection("
model{
for(i in 1:S) {
survs[i,1:4] ~ dmnorm(mus[], Omegas[,])
}
for(j in 1:H) {
survh[j,1:4] ~ dmnorm(muh[], Omegah[,])
}
mus[1:4] ~ dmnorm(mu0[1:4], S2[1:4,1:4])
Omegas[1:4,1:4] ~ dwish(S3[1:4,1:4],S)
anss.pred ~ dmnorm(mus[], Omegas[,])
muh[1:4] ~ dmnorm(mu0[1:4], S2[1:4,1:4])
Omegah[1:4,1:4] ~ dwish(S3[1:4,1:4],H)
ansh.pred ~ dmnorm(muh[], Omegah[,])
}
")
jags <- jags.model(
mod,
data = list(
survs = surv[eatsalm,c(8:11)],
S = sum(eatsalm),
survh = surv[eatherr,c(13:16)],
H = sum(eatherr),
mu0 = rep(2,4),
S2 = diag(4)/100000,
S3 = diag(4)/10000
),
n.chains = 4,
n.adapt = 100
)
update(jags, 100)
samps.j <- jags.samples(
jags,
c('muh','Omegah', 'ansh.pred', 'mus', 'Omegas', 'anss.pred'),
1000
)
jh <- array(
samps.j$ansh.pred,
dim = c(4,1000,4),
dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
)
scatterplotMatrix(t(jh[,,1]))
js <- array(
samps.j$anss.pred,
dim = c(4,1000,4),
dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
)
scatterplotMatrix(t(js[,,1]))
samps.c <- coda.samples(
jags,
c('muh','Omegah', 'ansh.pred', 'mus', 'Omegas', 'anss.pred'),
1000
)
plot(samps.c)
jhm <- melt(jh[,,1], value.name = "Result")
jsm <- melt(js[,,1], value.name = "Result")
oftenh <- Ovariable("oftenh", data = jsm[jsm$Question == 1, 2:3])
muchh <- Ovariable("muchh" , data = jsm[jsm$Question == 2, 2:3])
ofsideh <- Ovariable("ofsideh", data = jsm[jsm$Question == 3, 2:3])
musideh <- Ovariable("musideh", data = jsm[jsm$Question == 4, 2:3])
oftens <- Ovariable("oftens", data = jhm[jsm$Question == 1, 2:3])
muchs <- Ovariable("muchs" , data = jhm[jsm$Question == 2, 2:3])
ofsides <- Ovariable("ofsides", data = jhm[jsm$Question == 3, 2:3])
musides <- Ovariable("musides", data = jhm[jsm$Question == 4, 2:3])
#objects.store(oftenh, muchh, ofsideh, musideh, oftens, muchs, ofsides, musides)
#cat("Ovariables oftenh, muchh, ofsideh, musideh, oftens, muchs, ofsides, musides stored.\n")
##################### CORRELATION MATRIX
temp <- sapply(survey, as.numeric) # Can be done for surv to get a smaller matrix
survey_correlations <- (cor(temp, method="spearman", use="pairwise.complete.obs"))
temp <- colnames(survey_correlations)
melted_correlations <- melt(survey_correlations)
melted_correlations$Var1 <- factor(melted_correlations$Var1, levels=temp)
melted_correlations$Var2 <- factor(melted_correlations$Var2, levels=temp)
melted_correlations$value <- ifelse(melted_correlations$value >= 0.99,NA,melted_correlations$value)
ggplot(melted_correlations, aes(x = Var1, y = Var2, fill = value, label= round(value, 2)))+
geom_raster()+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.4))+
scale_fill_gradient2(low = "#480610", mid = "#FFFFFF", high = "#06480F", midpoint = 0, space = "Lab", guide = "colourbar")
############################### PRINCIPAL COORDINATE ANALYSIS (PCoA)
#tämä osa valmistaa sen datan.
hypocols1 <- c(46:49,95:98)
answ <- sapply(survey[hypocols1], FUN=as.numeric)
answ <- as.matrix(answ[!is.na(rowSums(answ)),])
pcoa_caps <- capscale(t(answ) ~ 1, distance="euclidean") ##PCoA done
## Kuva koko hypoteeseista
traits <- as.factor(c(rep("bipedalism", 10), rep("brain", 10), rep("hairlessness", 8),
rep("fat", 4), rep("larynx", 3), rep("speech", 7), rep("other", 9)))
colstr <- c("palevioletred1","royalblue1","seagreen1","violet","khaki2","skyblue", "orange")
trait.cols <- colstr[traits]
hypo_sizes <- (5 - colMeans(answ))
leg_sizes <- c(4, 3, 2, 1, 0.01)
#pdf(file="pcoa_plot.pdf", height=6, width=7.5)
plot(pcoa_caps, display = c("sp", "wa"), type="n")#, xlim=c(-6,4.5)) ## PCoA biplot, full scale
points(pcoa_caps, display= c("sp"), col="gray40") # adding the people points
points(pcoa_caps, display= c("wa"), pch=19)#, cex=hypo_sizes, col=trait.cols)
text(pcoa_caps, display=c("wa"), srt=25, cex=0.5)
#legend(x=-6, y=3.8, levels(traits), fill=colstr, bty="n", cex=1)
#legend(-6, -2, legend=c("Very likely", "Moderately likely",
# "No opinion", "Moderately unlikely", "Very unlikely"),
# pch=21, pt.cex = leg_sizes, bty="n", cex=1)
#dev.off()
| |