Difference between revisions of "Goherr: Fish consumption study"

From Testiwiki
Jump to: navigation, search
(Bayes model)
(Bayes model)
Line 224: Line 224:
 
* Model run 3.3.2017. p has more dimensions. [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=ZmbNUuZeb7UOf8NP]
 
* Model run 3.3.2017. p has more dimensions. [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=ZmbNUuZeb7UOf8NP]
 
* Model run 25.3.2017. Several model versions: strange binomial+multivarnormal, binomial, fractalised multivarnormal [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=pKe0s2Ldm1mbIVuO]
 
* Model run 25.3.2017. Several model versions: strange binomial+multivarnormal, binomial, fractalised multivarnormal [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=pKe0s2Ldm1mbIVuO]
* Moddel run 27.3.2017 [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=2hY9p2r8CTJi3Qwq]
+
* Model run 27.3.2017 [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=2hY9p2r8CTJi3Qwq]
 +
* Other models except multivariate normal were [http://en.opasnet.org/en-opwiki/index.php?title=Goherr:_Fish_consumption_study&oldid=40185 archived] and removed from active code 29.3.2017.
 +
* Model run 29.3.2017 with raw data graphs [http://en.opasnet.org/en-opwiki/index.php?title=Special:RTools&id=BB8nePJb7hzSw6Ha]
  
 
<rcode name="bayes" label="Initiate Bayes model (for developers only)" graphics=1>
 
<rcode name="bayes" label="Initiate Bayes model (for developers only)" graphics=1>
Line 271: Line 273:
  
 
# Row numbers for respondents that have eaten fish, Baltic salmon, and Baltic herring
 
# Row numbers for respondents that have eaten fish, Baltic salmon, and Baltic herring
eatfish <- (1:nrow(surv))[surv[[4]] %in% 1]
+
eatfish <- surv[[4]] %in% 1
 
eatsalm <- (1:nrow(surv))[!(surv[[7]] %in% 0 | is.na(surv[[7]]))]
 
eatsalm <- (1:nrow(surv))[!(surv[[7]] %in% 0 | is.na(surv[[7]]))]
 
eatherr <- (1:nrow(surv))[surv[[12]] %in% 1 & !is.na(rowSums(surv[13:16]))]
 
eatherr <- (1:nrow(surv))[surv[[12]] %in% 1 & !is.na(rowSums(surv[13:16]))]
Line 286: Line 288:
 
fisl
 
fisl
  
#################################################
+
############################# Plot original data
cat("Version with both binomial and multivariate normal.\n")
+
 
ggplot(data.frame(A=1:2, B=1:2), aes(A, y=B))+geom_point()+
+
levels(survey$Country)
   labs(title="Version with both binomial and multivariate normal.")
+
 
 +
ggplot(data.frame(
 +
  X = rep(1,5), Y = 5:1,
 +
  legend = c("All", "Finland", "Sweden", "Denmark", "Estonia")
 +
), aes(x=X, y=Y, label=legend))+
 +
geom_text()+
 +
   labs(title="Fish eating questions with random noise")
 +
 
 +
 
 +
## Fish eating questions with some random noise, all
 +
pl <- surv[eatfish,c(5,6,7,12)]
 +
scatterplotMatrix(
 +
  data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
 +
)
  
mod <- textConnection("
+
## Fish eating questions with some random noise, FI
  model{
+
pl <- surv[surv$Country == 1 & eatfish,c(5,6,7,12)]
    for(i in 1:S) {
+
scatterplotMatrix(
      for(j in 1:4) {
+
  data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
        surv[i,j] ~ dbin(p[j], qlen[j])
+
)
      }
 
    surv.scaled[i,1:4] ~ dmnorm(mu0[], Omega[,])
 
    }
 
    for(j in 1:4) {
 
      p[j] ~ dbeta(1,1)
 
    }
 
#    mu[1:4] ~ dmnorm(mu0[1:4], S2[1:4,1:4])
 
    Omega[1:4,1:4] ~ dwish(S3[1:4,1:4],4)
 
  }
 
")
 
  
jags <- jags.model(
+
## Fish eating questions with some random noise, SWE
  mod,
+
pl <- surv[surv$Country == 2 & eatfish,c(5,6,7,12)]
  data = list(
+
scatterplotMatrix(
    surv = surv[eatherr,13:16],
+
  data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
    surv.scaled = scale(surv[eatherr,13:16]),
 
    qlen = qlen[13:16],
 
    S = length(eatherr),
 
    mu0 = rep(0,4),
 
#    S2 = diag(4)/100000,
 
    S3 = diag(4)/10000
 
  ),
 
  n.chains = 4,
 
  n.adapt = 100
 
 
)
 
)
  
update(jags, 100)
+
## Fish eating questions with some random noise, Dk
samps.j <- jags.samples(jags, c('Omega', 'p'), 1000)
+
pl <- surv[surv$Country == 3 & eatfish,c(5,6,7,12)]
j <- array(
+
scatterplotMatrix(
   samps.j$p,
+
   data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
  dim = c(4,1000,4),
 
  dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
 
 
)
 
)
scatterplotMatrix(t(j[,,1]))
 
j.melt <- melt(j)
 
ggplot(j.melt, aes(x = Iter, y = value, colour = as.character(Seed)))+
 
  geom_line()+facet_wrap(~ Question)
 
  
j.Omega <- array(
+
## Fish eating questions with some random noise, EST
  samps.j$Omega,
+
pl <- surv[surv$Country == 4 & eatfish,c(5,6,7,12)]
  dim = c(4,4,1000,4),
+
scatterplotMatrix(
   dimnames = list(Q1 = 1:4, Q2 = 1:4, Iter = 1:1000, Seed = 1:4)
+
   data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
 
)
 
)
scatterplotMatrix(t(j.Omega[1,,,1]))
 
  
j.Omega.melt <- melt(j.Omega)
+
## Baltic herring questions with some random noise
ggplot(j.Omega.melt, aes(x = Iter, y = value, colour = as.character(Seed)))+
+
pl <- surv[eatherr,13:16]
  geom_line()+facet_grid(Q1 ~ Q2, scales = "free_y")
+
scatterplotMatrix(
 +
  data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
 +
)
  
samps.c <- coda.samples(jags, c('Omega','p'), 1000)
+
## Baltic salmon questions with some random noise
plot(samps.c)
+
pl <- surv[eatsalm,7:10]
 +
scatterplotMatrix(
 +
  data.matrix(pl) + runif(nrow(pl)*ncol(pl), -0.5, 0.5)
 +
)
  
 
############################################################
 
############################################################
Line 355: Line 349:
 
mod <- textConnection("
 
mod <- textConnection("
 
   model{
 
   model{
  for(i in 1:S) {
+
    for(i in 1:S) {
    surv[i,1:4] ~ dmnorm(mu[], Omega[,])
+
      surv[i,1:4] ~ dmnorm(mu[], Omega[,])
  }
+
    }
  mu[1:4] ~ dmnorm(mu0[1:4], S2[1:4,1:4])
+
    mu[1:4] ~ dmnorm(mu0[1:4], S2[1:4,1:4])
  Omega[1:4,1:4] ~ dwish(S3[1:4,1:4],4)
+
    Omega[1:4,1:4] ~ dwish(S3[1:4,1:4],4)
 +
    ans.pred ~ dmnorm(mu[], Omega[,])
 
   }
 
   }
 
")
 
")
Line 377: Line 372:
  
 
update(jags, 100)
 
update(jags, 100)
samps.j <- jags.samples(jags, c('mu','Omega', 'z'), 1000)
+
samps.j <- jags.samples(jags, c('mu','Omega', 'ans.pred'), 1000)
 
j <- array(
 
j <- array(
   samps.j$mu,
+
   samps.j$ans.pred,
 
   dim = c(4,1000,4),
 
   dim = c(4,1000,4),
 
   dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
 
   dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
Line 387: Line 382:
 
plot(samps.c)
 
plot(samps.c)
  
###########################################################
 
cat("Version with multivariate normal and fractile conversion.\n")
 
ggplot(data.frame(A=1:2, B=1:2), aes(A, y=B))+geom_point()+
 
  labs(title="Version with multivariate normal and fractile conversion.")
 
 
 
# Assume linearity in the responses, but it is artificially binned into survey answers.
 
# Assume that answers are uncorrelated with a subgroup of respondents that have
 
# answered in the same way to two questions. So, for each answer column d, you can
 
#1 shuffle the vector d (keeping in mind the original order)
 
#2 sort it based on the answer
 
#3 renumber it by (1:length(d) - 0.5)/length(d) to get a fractile from 0..1.
 
#4 transform it to normal distribution by qnorm(d)
 
#5 reorder d to the original order
 
# Then, use multivariate normal distribution to estimate parameters.
 
 
 
fractalise <- function(d) {
 
  f <- sample(seq_along(d)) # Create a random order for data
 
  g <- order(d[f]) # First randomorder and then put in order by size
 
  h <- qnorm((seq_along(d)-0.5)/length(d)) # 'z scores' of data points
 
  return(h[order(g)][order(f)]) # Reorder fractiles to original order
 
}
 
 
surv.f <- surv[1:nrow(surv) %in% eatherr & !is.na(rowSums(surv[13:16])) , ]
 
surv.f[13:16] <- lapply(surv.f[13:16], fractalise)
 
 
 
mod <- textConnection("
 
  model{
 
    for(i in 1:S) {
 
      surv.f[i,1:4] ~ dmnorm(mu[], Omega[,])
 
    }
 
    for(j in 1:4) {
 
      mu[j] ~ dnorm(0, 3)
 
    }
 
    Omega[1:4,1:4] ~ dwish(S3[1:4,1:4],S)
 
    surv.pred[1:4] ~ dmnorm(mu[1:4], Omega[1:4,1:4])
 
    Opri[1:4,1:4] ~ dwish(S3[1:4,1:4],S)
 
  }
 
")
 
 
jags <- jags.model(
 
  mod,
 
  data = list(
 
    surv.f = surv.f,
 
    S = nrow(surv.f),
 
#    mu0 = rep(0.5,4), # hyperprior for mean
 
#    S2 = diag(4)/100000, # hyperprior for mean
 
    S3 = diag(4)*5#/10000 # hyperprior for covariance table
 
  ),
 
  n.chains = 4,
 
  n.adapt = 100
 
)
 
 
update(jags, 100)
 
samps.j <- jags.samples(jags, c('mu','Omega', 'surv.pred', 'Opri'), 1000)
 
j <- array(
 
  samps.j$surv.pred,
 
  dim = c(4,1000,4),
 
  dimnames = list(Question = 1:4, Iter = 1:1000, Seed = 1:4)
 
)
 
 
oprint(cor(t(samps.j$surv.pred[,,1])))
 
oprint(cor(surv.f[13:16]))
 
scatterplotMatrix(t(j[,,1]))
 
scatterplotMatrix(surv.f[13:16])
 
 
j2 <- array(
 
  samps.j$Opri,
 
  dim = c(4,4,1000,4),
 
  dimnames = list(Q1 = 1:4, Q2 = 1:4, Iter = 1:1000, Seed = 1:4)
 
)
 
j2.cov <- apply(j2[,,,1], MARGIN = 1:2, FUN = mean)
 
oprint(j2.cov)
 
scatterplotMatrix(t(j2[1,,,1]))
 
 
samps.c <- coda.samples(jags, c('mu','Omega', 'surv.pred'), 1000)
 
plot(samps.c)
 
 
################################################################3
 
cat("MODEL WITH BINOMIAL DISTRIBUTION ASSUMPTION.\n")
 
ggplot(data.frame(A=1:2, B=1:2), aes(A, y=B))+geom_point()+
 
  labs(title="Version with binomial distribution assumption.")
 
 
 
# Interesting fish eating questions
 
surv <- survey[c(1,3,4,16,29,30,31,46:49,86,95:98,158)]
 
colnames(surv)
 
 
consume <- data.matrix(survey[c(
 
  "Baltic salmon",
 
  "Eat Baltic herring"
 
)])
 
consume <- 1 - (consume!=2 | is.na(consume)) # Yes=1, other=0
 
 
agel <- as.character(unique(survey$Ages))
 
countryl <- sort(as.character(unique(survey$Country)))
 
genderl <- sort(as.character(unique(survey$Gender)))
 
fisl <- c("Salmon", "Herring")
 
 
ans.sal <- survey[
 
    grep("Yes", survey[["Baltic salmon"]]) ,
 
    c(
 
      "How often Baltic salmon",
 
      "How much Baltic salmon",
 
      "How often side Baltic salmon",
 
      "How much side Baltic salmon",
 
      "Ages",
 
      "Country",
 
      "Gender"
 
    )]
 
 
ans.her <- survey[
 
    grep("Yes", survey[["Eat Baltic herring"]]) ,
 
    c(
 
      "How often Baltic herring",
 
      "How much Baltic herring",
 
      "How often side Baltic herring",
 
      "How much side Baltic herring",
 
      "Ages",
 
      "Country",
 
      "Gender"
 
    )]
 
quesl <-    c(
 
  "How often",
 
  "How much",
 
  "How often side",
 
  "How much side",
 
  "Ages",
 
  "Country",
 
  "Gender"
 
)
 
colnames(ans.sal) <- quesl
 
colnames(ans.her) <- quesl
 
ans <- rbind(ans.sal, ans.her)
 
 
head(ans)
 
qlen <-  c(7,7,7,5) # Number of options in each question
 
 
questionl <- colnames(ans.sal)
 
questionl.her <- colnames(ans.her)
 
 
agel
 
countryl
 
genderl
 
fisl
 
 
mod <- textConnection("
 
  model{
 
# Data1: Full questionnaire
 
    for(f in 1:2) { # Salmon or herring
 
      for(i in 1:U) { # Full questionnaire
 
        consume[i,f] ~ dbern(p[f,ageU[i],countryU[i],genderU[i]])
 
      }
 
    }
 
# Data2: Those responses that reported either Baltic herring or salmon
 
    for(q in 1:Q) { # Q = # of fish-specific questions
 
      for(i in 1:S) { # S = # respondents who eat Baltic salmon or herring
 
        ans[i,q] ~ dbin(p2[fis[i],age[i],country[i],gender[i],q], qlen[q])
 
      }
 
    }
 
# Priors (multidimensional)
 
    for(i in 1:A) { # Age groups
 
      for(j in 1:C) { # Countries
 
        for(k in 1:G) { #Genders
 
          for(f in 1:2) { # Fish species
 
            p[f,i,j,k] ~ dbeta(1,1)
 
            for(q in 1:Q) { # Q = # of fish-specific questions
 
              p2[f,i,j,k,q] ~ dbeta(1,1)
 
              ans.pred[f,i,j,k,q] ~ dbin(p2[f,i,j,k,q], qlen[q])
 
            }
 
          }
 
        }
 
      }
 
    }
 
  }
 
")
 
 
jags <- jags.model(
 
  mod,
 
  data = list(
 
    U = nrow(surv),
 
    Q = 4,
 
    S = nrow(ans),
 
    consume = consume,
 
    ans = data.matrix(ans[1:4]) - 1, # Zero is the first option
 
    qlen = qlen-1, # Subtract 1 because starts from 0.
 
    age = match(ans$Ages, agel),
 
    country = match(ans$Country, countryl),
 
    gender = match(ans$Gender, genderl),
 
    ageU = match(survey$Ages, agel),
 
    countryU = match(survey$Country, countryl),
 
    genderU = match(survey$Gender, genderl),
 
    fis = c(rep(1, nrow(ans.sal)), rep(2, nrow(ans.her))),
 
    A = length(agel),
 
    C = length(countryl),
 
    G = length(genderl)
 
  ),
 
  n.chains = 4,
 
  n.adapt = 100
 
)
 
 
update(jags, 100)
 
samps <- jags.samples(jags, c('ans.pred','p','p2'), 1000)
 
#samps.coda <- coda.samples(jags, c('mu', 'pcd.pred', 'ans.pred'), 1000)
 
 
ans.pred <- array(
 
  samps$ans.pred,
 
  dim = c(length(fisl), length(agel), length(countryl), length(genderl), length(quesl), 1000, 4),
 
  dimnames = list(
 
    Fish = fisl,
 
    Age = agel,
 
    Country = countryl,
 
    Gender = genderl,
 
    Question = quesl,
 
    Iter = 1:1000,
 
    Seed = c("S1","S2","S3","S4")
 
  )
 
)
 
 
p.pred <- array(
 
  samps$p,
 
  dim = c(length(fisl), length(agel), length(countryl), length(genderl), 1000, 4),
 
  dimnames = list(
 
    Fish = fisl,
 
    Age = agel,
 
    Country = countryl,
 
    Gender = genderl,
 
    Iter = 1:1000,
 
    Seed = c("S1","S2","S3","S4")
 
  )
 
)
 
 
 
p2.pred <- array(
 
  samps$p2,
 
  dim = c(length(fisl), length(agel), length(countryl), length(genderl), length(quesl), 1000, 4),
 
  dimnames = list(
 
    Fish = fisl,
 
    Age = agel,
 
    Country = countryl,
 
    Gender = genderl,
 
    Question = quesl,
 
    Iter = 1:1000,
 
    Seed = c("S1","S2","S3","S4")
 
  )
 
)
 
 
ansm <- melt(ans.pred)
 
pm <- melt(p.pred)
 
p2m <- melt(p2.pred)
 
 
scatterplotMatrix(t(ans.pred[1,1,,1,1,,1]))
 
 
scatterplotMatrix(t(p2.pred[1,1,,1,1,,1]))
 
scatterplotMatrix(t(p2.pred[,1,3,1,1,,1]))
 
scatterplotMatrix(t(p2.pred[1,,3,1,1,,1]))
 
scatterplotMatrix(t(p2.pred[1,1,3,,1,,1]))
 
scatterplotMatrix(t(p2.pred[1,1,3,1,,,1]))
 
 
scatterplotMatrix(t(p.pred[1,1,,1,,1]))
 
scatterplotMatrix(t(p.pred[,1,3,1,,1]))
 
scatterplotMatrix(t(p.pred[1,,3,1,,1]))
 
scatterplotMatrix(t(p.pred[1,1,3,,,1]))
 
  
 
#objects.store(ans.pred, p.pred, p2.pred)
 
#objects.store(ans.pred, p.pred, p2.pred)

Revision as of 08:51, 29 March 2017


Question

How Baltic herring and salmon are used as human food in Baltic sea countries? Which determinants affect on people’s eating habits of these fish species?

Answer

Survey data will be analysed during winter 2016-2017 and results will be updated here.

+ Show code

Rationale

Survey of eating habits of Baltic herring and salmon in Denmark, Estonia, Finland and Sweden has been done in September 2016 by Taloustutkimus oy. Content of the questionnaire can be accessed in Google drive. The actual data will be uploaded to Opasnet base on Octobere 2016.

The R-code to analyse the survey data will be provided on this page later on.

Data

Original datafile File:Goherr fish consumption.csv

Preprocessing

This code is used to preprocess the original questionnaire data from the above .csv file and to store the data as a usable variable to Opasnet base.

+ Show code

Bayes model

  • Model run 3.3.2017. All variables assumed independent. [1]
  • Model run 3.3.2017. p has more dimensions. [2]
  • Model run 25.3.2017. Several model versions: strange binomial+multivarnormal, binomial, fractalised multivarnormal [3]
  • Model run 27.3.2017 [4]
  • Other models except multivariate normal were archived and removed from active code 29.3.2017.
  • Model run 29.3.2017 with raw data graphs [5]

+ Show code

Calculations

This code calculates how much (g/day) Baltic herring and salmon are eaten based on an Bayesian model build up based on the questionnaire data.

+ Show code

Assumptions

The following assumptions are used:

Assumptions for calculations(-)
ObsVariablevalueExplanationResult
1freq6times per year260 - 364
2freq5times per year104 - 208
3freq4times per year52
4freq3times per year12 - 36
5freq2times per year2 - 5
6freq1times per year0.5 - 0.9
7freq0times per year0
8amdish0grams / serving20 - 50
9amdish1grams / serving70 - 100
10amdish2grams / serving120 - 150
11amdish3grams / serving170 - 200
12amdish4grams / serving220 - 250
13amdish5grams / serving270 - 300
14amdish6grams / serving450 - 500
15ingridientfraction0.1 - 0.3
16amside0grams / serving20 - 50
17amside1grams / serving70 - 100
18amside2grams / serving120 - 150
19amside3grams / serving170 - 200
20amside4grams / serving220 - 250

Questionnaire


Dependencies

The survey data will be used as input in the benefit-risk assessment of Baltic herring and salmon intake, which is part of the WP5 work in Goherr-project.

Formula

See also

Keywords

References


Related files

<mfanonymousfilelist></mfanonymousfilelist>

Goherr: Fish consumption study. Opasnet . [6]. Accessed 17 May 2024.