Difference between revisions of "Impact Calculation Tool for R"

From Testiwiki
Jump to: navigation, search
(R calculations: a new bug created)
m
 
(6 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 
{{method|moderator=Virpi Kollanus}}
 
{{method|moderator=Virpi Kollanus}}
 +
[[Category:Contains R code]]
  
 
=Life table calculations=
 
=Life table calculations=
Line 5: Line 6:
 
==R calculations==
 
==R calculations==
  
{{defend|# |Lifetable function works with data.frames!|--[[User:Jouni|Jouni]] 23:38, 9 July 2013 (EEST)}} {{defend|# |And with ovariables!|--[[User:Jouni|Jouni]] 00:49, 10 July 2013 (EEST)}}
+
{{defend|# |Lifetable function works with data.frames!|--[[User:Jouni|Jouni]] 23:38, 9 July 2013 (EEST)}} {{defend|# |And with ovariables!|--[[User:Jouni|Jouni]] 00:49, 10 July 2013 (EEST)}}
 +
 
 +
{{comment|# |Changed to a more versatile ovariable implementation, fixed loop so that no unwanted years show, also more efficient loop (rbind vs merge and melt at end), "pop" renamed "Population" due to some unknown bug, see comparison for details.|--[[User:Teemu R|Teemu R]] 22:35, 22 July 2013 (EEST)}}
  
 
<rcode graphics="1">
 
<rcode graphics="1">
 
library(OpasnetUtils)
 
library(OpasnetUtils)
library(reshape2)
 
 
library(ggplot2)
 
library(ggplot2)
  
# lifetable is a life table function.
+
#########################
 +
# Lifetable function #
 +
#########################
  
 
lifetable <- function(
 
lifetable <- function(
pop = data.frame(Birth = 2000, Result = 1000), # population by birth year.
+
pop = data.frame(Birth = 2000, Result = 1000), # population by birth year.
mort = data.frame(Age = 0:100, Result = 0.02), # mortality by age.
+
mort_frac = data.frame(Age = 0:100, Result = 0.02), # mortality by age.
followup = 100, # follow-up time.
+
followup = 100, # follow-up time.
keep = (0:20) * 5, # Which years to save?  
+
keep = (0:20) * 5, # Which years to save?  
starttime = 2000 # What year is the starting time or reference?
+
starttime = 2000, # What year is the starting time or reference? Ignored if present in data.
 +
... # Ovariable evaluation extra parameters
 
) {
 
) {
 
+
# Check for ovariable input; Instead of having 4 different methods for all combinations we can check
 +
# individually in one function.
 +
 +
if(class(mort_frac)=="ovariable"){
 +
if(nrow(mort_frac@output) == 0) mort_frac <- EvalOutput(mort_frac, ...)
 +
# Proceed to remove non-marginal columns
 +
temp <- result(mort_frac)
 +
mort_frac <- mort_frac@output[mort_frac@marginal]
 +
mort_frac$Time <- NULL
 +
mort_frac$Mort <- temp
 +
}
 +
 +
if(class(pop)=="ovariable"){
 +
if(nrow(pop@output) == 0) pop <- EvalOutput(pop, ...)
 +
# Proceed to remove non-marginal columns
 +
temp <- result(pop)
 +
pop <- pop@output[pop@marginal]
 +
pop$Result <- temp
 +
}
 +
 
# Prepare input data for yearly loops of follow-up.
 
# Prepare input data for yearly loops of follow-up.
 
+
poptemp <- pop
+
if(!"Time" %in% colnames(pop)) {
if(!"Time" %in% colnames(poptemp)) {poptemp$Time <- starttime} # create Time index
+
pop$Time <- starttime # create Time index if it does not exist
 
+
} else {
colnames(mort)[colnames(mort) == "Result"] <- "Mort"
+
starttime <- min(pop$Time) # else take lowest value as starting point
 
+
}
out <- data.frame("V1" = poptemp$Result) # initiate the output data.frame.
+
 
+
temp_pop <- data.frame() # init
 +
 +
out <- pop
 +
 
# Follow the population year by year.
 
# Follow the population year by year.
 
+
 
for(i in 0:followup) {
 
for(i in 0:followup) {
 
+
poptemp$Age <- poptemp$Time - poptemp$Birth + i # update the age
+
# Take any newborn population.
 
+
temp_pop_add <- pop[pop$Time == starttime + i,]
poptemp <- merge(poptemp, mort, all.x = TRUE) # combine population and age-specific mortality
+
poptemp$Mort[poptemp$Time < poptemp$Birth] <- 0 # No mortality before birth
+
# Rbind newborn with leftover population from last iteration
poptemp$Mort[is.na(poptemp$Mort)] <- 1 # All die beyond the mortality table
+
temp_pop <- rbind(temp_pop_add, temp_pop)
 
+
# save the result if it matches with the years to be kept
+
temp_pop$Age <- temp_pop$Time - temp_pop$Birth # update age
if(i %in% keep) {out[[match(i, keep)]] <- poptemp$Result}
+
 
+
temp_pop <- merge(temp_pop, mort_frac, all.x = TRUE) # combine population and age-specific mortality
poptemp$Result <- poptemp$Result * (1 - poptemp$Mort) # update the population size
+
temp_pop$Mort[is.na(temp_pop$Mort)] <- 1 # All die beyond the mortality table
poptemp <- poptemp[colnames(poptemp) != "Mort"]
+
 +
temp_pop$Result <- temp_pop$Result * (1 - temp_pop$Mort) # update the population size
 +
temp_pop <- temp_pop[colnames(pop)]
 +
temp_pop$Time <- temp_pop$Time + 1 # update time
 +
 +
# Rbind wanted years with output
 +
if(i %in% keep) {
 +
out <- rbind(out, temp_pop)
 +
}
 
}
 
}
 
indices <- colnames(pop)[colnames(pop) != "Result"]
 
out <- cbind(poptemp[indices], out) # add index columns to the output
 
out <- melt(out, id.vars = indices, variable.name = "Timetemp", value.name = "Result") # turn into long format
 
 
# Update the Time index.
 
 
out$Time <- out$Time + keep[as.numeric(substr(out$Timetemp, 2, 100))]
 
 
out <- out[colnames(out) != "Timetemp"]
 
out <- out[out$Time >= out$Birth , ] # remove unborn
 
 
return(out)
 
return(out)
 
}
 
}
  
setGeneric("lifetable")
+
#################################
 +
# Ovariable Definitions #
 +
#################################
  
setMethod(
+
Population <- Ovariable(name = "Population", ddata = "Op_en5994.population")
f = "lifetable",
+
Population@data$Age <- as.integer(levels(Population@data$Age)[Population@data$Age])
signature = signature(pop = "ovariable", mort = "ovariable"),
+
Population@data$Time <- as.integer(levels(Population@data$Time)[Population@data$Time])
definition = function(
+
Population@data$Birth <- Population@data$Time - Population@data$Age
pop,
 
mort,
 
followup = 100,
 
keep = (0:20) * 5,
 
starttime = 2000
 
) {
 
v = FALSE
 
if (ncol(pop@output) == 0) pop <- EvalOutput(pop, verbose = v)
 
rescolpop <- paste(pop@name, "Result", sep = "")
 
colnames(pop@output)[colnames(pop@output) == rescolpop] <- "Result"
 
# Here we should remove all non-index columns. But so far the user has to do it by hand.
 
  
if (ncol(mort@output) == 0) mort <- EvalOutput(mort, verbose = v)
+
mort <- Ovariable(name = "mort", ddata = "Op_en5994.mortality")
rescolmort <- paste(mort@name, "Result", sep = "")
+
mort@data$Age <- as.integer(levels(mort@data$Age)[mort@data$Age])
colnames(mort@output)[colnames(mort@output) == rescolmort] <- "Result"
+
 
+
mort_frac <- Ovariable("mort_frac", dependencies = data.frame(Name = c("mort", "Population")),
callGeneric(
+
formula = function(...){
pop = pop@output,
+
temp <- mort / Population
mort = mort@output,
+
temp <- temp@output[temp@output$Time == "2011" , !colnames(temp@output) %in% c("Birth")]
followup = followup,
+
return(temp)
keep = keep,
+
}
starttime = starttime
 
)
 
}
 
 
)
 
)
  
pop <- opbase.data("Op_en5994", subset = "population")
+
#################################
 
+
# Evaluation and presentation #
mort <- opbase.data("Op_en5994", subset = "mortality")
+
#################################
 
 
pop <- EvalOutput(Ovariable(name = "pop", ddata = "Op_en5994.population"), N = 10)
 
pop@output$Age <- as.integer(levels(pop@output$Age)[pop@output$Age])
 
pop@output$Time <- as.integer(levels(pop@output$Time)[pop@output$Time])
 
 
 
mort <- EvalOutput(Ovariable(name = "mort", ddata = "Op_en5994.mortality"), N = 10)
 
mort@output$Age <- as.integer(levels(mort@output$Age)[mort@output$Age])
 
 
 
mort <- mort / pop
 
mort@output <- mort@output[colnames(mort@output) %in% c("Time", "Age", "Result", "Iter")]
 
  
cat("Mortality rate by age.\n")
+
out <- lifetable(pop = Population, mort_frac = mort_frac, N = 100)
oprint(mort@output[mort@output$Iter == 1 , ])
 
  
pop@output$Birth <- pop@output$Time - pop@output$Age
+
cat("Mortality, Population and mortality rate by age in 2011.\n")
pop@output <- pop@output[colnames(pop@output) %in% c("Time", "Birth", "popResult", "Iter")]
+
oprint(mort_frac)
  
cat("Population data for the start years of subcohorts.\n")
+
#cat("Population data for the start years of subcohorts.\n")
oprint(pop@output[pop@output$Iter == 1 , ])
+
#oprint(Population)
  
out <- lifetable(pop = pop, mort = mort, followup = 50, keep = c(0, 5, 10, 20, 30, 40, 50))
+
cat("Lifetable\n")
oprint(out[out$Iter == 1 , ])
+
oprint(out[1:100,])
 +
#oprint(out)
  
 
ggplot(out, aes(x = Time, y = Result, group = Birth)) + geom_line(aes(colour = Birth)) + theme_grey(base_size = 24)
 
ggplot(out, aes(x = Time, y = Result, group = Birth)) + geom_line(aes(colour = Birth)) + theme_grey(base_size = 24)
 
lifetable()
 
 
lifetable(
 
pop = data.frame(Birth = 2000, Result = 1000), # population by birth year.
 
mort = data.frame(Age = 0:100, Result = 0.02), # mortality by age.
 
followup = 100, # follow-up time.
 
keep = (0:20) * 5, # Which years to save?
 
starttime = 2000 # What year is the starting time or reference?
 
)
 
 
 
</rcode>
 
</rcode>
 
{{attack|# |There is a small mistake that will cause two problems if the follow-up starts before the cohort is born:
 
* If there is no mortality given for unborn, they will all die.
 
* There will be one year additional mortality before year 0, unlike if the follow-up starts only at the year of birth.
 
|--[[User:Jouni|Jouni]] 06:56, 10 July 2013 (EEST)}}
 
  
 
==Input data==
 
==Input data==
Line 147: Line 133:
 
2011|2|56683|
 
2011|2|56683|
 
2011|3|56683|
 
2011|3|56683|
2011|4|56683-66683|
+
2011|4|56683|
 
2011|5|60615|
 
2011|5|60615|
 
2011|6|60615|
 
2011|6|60615|
Line 302: Line 288:
 
7|10.2|
 
7|10.2|
 
8|10.2|
 
8|10.2|
9|10.2-102|
+
9|10.2|
 
10|10.6|
 
10|10.6|
 
11|10.6|
 
11|10.6|

Latest revision as of 10:09, 27 August 2013

Life table calculations

R calculations

# : Lifetable function works with data.frames! --Jouni 23:38, 9 July 2013 (EEST) # : And with ovariables! --Jouni 00:49, 10 July 2013 (EEST)

--# : Changed to a more versatile ovariable implementation, fixed loop so that no unwanted years show, also more efficient loop (rbind vs merge and melt at end), "pop" renamed "Population" due to some unknown bug, see comparison for details. --Teemu R 22:35, 22 July 2013 (EEST)

+ Show code

Input data

Population structure in the beginning of the assessment follow-up period (pop_data)

population(-)
ObsTimeAgeResultDescription
12011056683
22011156683
32011256683
42011356683
52011456683
62011560615
72011660615
82011760615
92011860615
102011960615
1120111066167
1220111166167
1320111266167
1420111366167
1520111466167
1620111563786
1720111663786
1820111763786
1920111863786
2020111963786
2120112066423
2220112166423
2320112266423
2420112366424
2520112466423
2620112565882
2720112665882
2820112765882
2920112865882
3020112965882
3120113061495
3220113161495
3320113261495
3420113361495
3520113461495
3620113572474
3720113672474
3820113772474
3920113872474
4020113972474
4120114075917
4220114175917
4320114275917
4420114375917
4520114475917
4620114576977
4720114676977
4820114776977
4920114876977
5020114976977
5120115080206
5220115180206
5320115280206
5420115380206
5520115480206
5620115580291
5720115680291
5820115780291
5920115880291
6020115980291
6120116054300
6220116154300
6320116254300
6420116354300
6520116454300
6620116548077
6720116648077
6820116748077
6920116848077
7020116948077
7120117041475
7220117141475
7320117241475
7420117341475
7520117441475
7620117534987
7720117634987
7820117734987
7920117834987
8020117934987
8120118023300
8220118123300
8320118223300
8420118323300
8520118423300
8620118511292
8720118611292
8820118711292
8920118811292
9020118911292
912011904394
922011914394
932011924394
942011934394
952011944394
96201195886
97201196886
98201197886
99201198886
100201199886
1012012057000
1022013057000
1032014057000
1042015057000
1052016057000
1062017057000
1072018057000
1082019057000
1092020057000
1102021057000
1112022057000
1122023057000
1132024057000
1142025057000
1152026057000
1162027057000
1172028057000
1182029057000

Annual birth rate (birth_rate)

# : Not needed because it can be given as a part of the population. --Jouni 06:46, 10 July 2013 (EEST)

birth.rate(-)
ObsFollow-up periodResultKuvaus
1201057000
2201157000
3201257000
4201357000
5201457000
6201557000
7201657000
8201757000
9201857000
10201957000
11202057000
12202157000
13202257000
14202357000
15202457000
16202557000
17202657000
18202757000
19202857000
20202957000

Annual mortality rate (mort_rate)

mortality(-)
ObsAgeResultKuvaus
1049.8
2149.8
3249.8
4349.8
5449.8
6510.2
7610.2
8710.2
9810.2
10910.2
111010.6
121110.6
131210.6
141310.6
151410.6
161530.6
171630.6
181730.6
191830.6
201930.6
212050
222150
232250
242350
252450
262547
272647
282747
292847
302947
313048.6
323148.6
333248.6
343348.6
353448.6
363599.4
373699.4
383799.4
393899.4
403999.4
4140156.6
4241156.6
4342156.6
4443156.6
4544156.6
4645247.6
4746247.6
4847247.6
4948247.6
5049247.6
5150395.8
5251395.8
5352395.8
5453395.8
5554395.8
5655555
5756555
5857555
5958555
6059555
6160534
6261534
6362534
6463534
6564534
6665702.4
6766702.4
6867702.4
6968702.4
7069702.4
7170975
7271975
7372975
7473975
7574975
76751372.4
77761372.4
78771372.4
79781372.4
80791372.4
81801619.6
82811619.6
83821619.6
84831619.6
85841619.6
86851399.8
87861399.8
88871399.8
89881399.8
90891399.8
9190934
9291934
9392934
9493934
9594934
9695313
9796313
9897313
9998313
10099313

Mortality risk (mort_risk)

mort_rate / pop_data

Start year (start-year)

2010

Follow-up time in years (followup_time)

20


Analytica codes

Variables, which need to be translated into ovariables:

Population in time, child (pop_in_time_child)

var k: Birth_rate[Fu_year=Year_lt];

k:= if k = null then 0 else k;

var a:= if @Year_lt = 1 then Pop_data else (if @Age=1 then k else 0);

a:= a[Age=age_child];

var j:= Mort_risk[Age=age_child];

j:=j[Fu_period=Period_lt];

j:= Si_pi(j, 5, Period_lt, Year_lt, Year_help)*5;

j:= if j = null then j[Period_lt=max(Fu_period)] else j;

j:= if j < 0 then 0 else j;

j:= if j > 1 then 1 else j;

j:= 1-j;

var x:= 1;

while x<= min([size(age_child),size(Year_lt)]) do (

var b:= a*j; b:= b[@age_child=@age_child-1, @Year_lt=@Year_lt-1];

a:= if b=null then a else b;

x:= x+1);

sum(if Year_lt = period_vs_year then a else 0,Year_lt)



Population in time, beginning of time step (pop_in_time_beg)

var a:= sum(if floor(Age/5)+1 = @Age_cat then Pop_data else 0 , Age);

a:= if @Age_cat=1 then sum(Pop_in_time_child, Age_child) else (if @period_lt = 1 then a else 0);

var j:= Mort_risk;

j:=j[Fu_period=Period_lt];

j:= if j = null then j[Period_lt=max(Fu_period)] else j;

j:= if j < 0 then 0 else j;

j:= if j > 1 then 1 else j;

j:= 1-j;

j:= sum(if floor(Age/5)+1 = @Age_cat then j else 0 , Age)/5;

var m:=j[@Age_cat=@Age_cat+1];

m:= if m=null then 0 else m;

var n:=((j^5)+(j^4*m)+(j^3*m^2)+(j^2*m^3)+(j*m^4))/5;

var x:= 1;

while x<= min([size(Age_cat),size(Period_lt)]) do ( var b:= a*n;

b:= b[@Age_cat=@Age_cat-1, @Period_lt=@Period_lt-1];

a:= if b=null then a else b;

x:= x+1);

a


Indices and function used in the code above:

Follow-up year (fu_year)

sequence(Start_year,Start_year+(Followup_time-1),1)


Year in life table (year_lt)

sequence(Start_year,Start_year+Followup_time+99,1)


Follow-up period in 5-year time steps (fu_period)

sequence(Start_year,Start_year+(Followup_time-1),5)


5-year period in life table (period_lt)

sequence(Start_year,Start_year+Followup_time+99,5)


Age of child (age_child)

sequence(0,4,1)


Si_pi function (si_pi)

Parameters: (data, kerroin;karkea,tarkka:indextype;indtieto)

Description

  • Data = data to be divided into more detailed parts
  • Kerroin = relative weight inside a cluster
  • Karkea = index for the clustered data
  • Tarkka = index for the detailed data
  • Indtieto = Data about which detailed item belongs to which cluster

Analytica code:

var a:=sum((if indtieto=karkea then kerroin else 0), tarkka);

a:= sum((if indtieto=karkea then a else 0), karkea);

a:= kerroin/a;

sum((if indtieto=karkea then data*a else 0), karkea)