Many national household surveys use rotating panel designs, where a sample of respondents is interviewed in an initial wave (implantation) and then followed up over successive periods. Uruguay’s ECH, for example, interviews each household once and then conducts monthly follow-ups for the rest of the year.
metasurvey provides two classes for this type of design:
RotativePanelSurvey – a panel with an implantation
survey and a list of follow-up surveysPoolSurvey – a collection of surveys grouped together
for combined estimation across periodsA RotativePanelSurvey requires an implantation
Survey and one or more follow-up Survey
objects.
library(metasurvey)
library(data.table)
set_use_copy(TRUE)
set.seed(42)
n <- 100
make_survey <- function(edition) {
dt <- data.table(
id = 1:n,
age = sample(18:80, n, replace = TRUE),
income = round(runif(n, 5000, 80000)),
employed = sample(0:1, n, replace = TRUE),
w = round(runif(n, 0.5, 3.0), 4)
)
Survey$new(
data = dt, edition = edition, type = "ech",
psu = NULL, engine = "data.table",
weight = add_weight(annual = "w")
)
}
# Implantation: 2023 wave 1
impl <- make_survey("2023")
# Follow-ups: waves 2 through 4
fu_2 <- make_survey("2023")
fu_3 <- make_survey("2023")
fu_4 <- make_survey("2023")
panel <- RotativePanelSurvey$new(
implantation = impl,
follow_up = list(fu_2, fu_3, fu_4),
type = "ech",
default_engine = "data.table",
steps = list(),
recipes = list(),
workflows = list(),
design = NULL
)Use get_implantation() and get_follow_up()
to retrieve the individual surveys:
Apply transformations to individual panel components. The same step functions work on both the implantation and follow-up surveys:
# Transform the implantation survey
panel$implantation <- step_compute(panel$implantation,
income_k = income / 1000,
comment = "Income in thousands"
)
# Apply the same step to each follow-up
panel$follow_up <- lapply(panel$follow_up, function(svy) {
step_compute(svy, income_k = income / 1000, comment = "Income in thousands")
})Use workflow() on individual panel components to perform
cross-sectional or time-series analysis.
result_impl <- workflow(
list(panel$implantation),
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
result_impl
#> stat variable value se cv confint_lower
#> <char> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income income 43742.45 2420.706 0.05533997 38997.96
#> confint_upper evaluate
#> <num> <char>
#> 1: 48486.95 Very goodresults <- rbindlist(lapply(seq_along(panel$follow_up), function(i) {
r <- workflow(
list(panel$follow_up[[i]]),
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
r$period <- panel$follow_up[[i]]$edition
r
}))
results[, .(period, stat, value, se, cv)]
#> period stat value se cv
#> <num> <char> <num> <num> <num>
#> 1: 2023 survey::svymean: income 41537.99 2328.715 0.05606230
#> 2: 2023 survey::svymean: income 42809.39 2383.860 0.05568546
#> 3: 2023 survey::svymean: income 41314.46 2232.803 0.05404409A PoolSurvey groups multiple surveys for combined
estimation. This is useful when you want to aggregate monthly data into
quarterly or annual estimates, or when combining surveys reduces
sampling variability.
The constructor takes a nested list:
list(estimation_type = list(group = list(surveys))).
s1 <- make_survey("2023")
s2 <- make_survey("2023")
s3 <- make_survey("2023")
pool <- PoolSurvey$new(
list(annual = list("q1" = list(s1, s2, s3)))
)
class(pool)
#> [1] "PoolSurvey" "R6"pool_result <- workflow(
pool,
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
pool_result
#> stat variable value se cv confint_lower
#> <char> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income income 44574.23 2226.844 0.04995811 40209.69
#> 2: survey::svymean: income income 44341.53 2407.912 0.05430377 39622.10
#> 3: survey::svymean: income income 41293.92 2239.968 0.05424451 36903.66
#> confint_upper evaluate period type variance
#> <num> <char> <num> <char> <num>
#> 1: 48938.76 Excellent 2023 q1 4958836
#> 2: 49060.95 Very good 2023 q1 5798041
#> 3: 45684.18 Very good 2023 q1 5017458Surveys can be organized into multiple groups:
s4 <- make_survey("2023")
s5 <- make_survey("2023")
s6 <- make_survey("2023")
pool_semester <- PoolSurvey$new(
list(annual = list(
"q1" = list(s1, s2, s3),
"q2" = list(s4, s5, s6)
))
)
result_semester <- workflow(
pool_semester,
survey::svymean(~income, na.rm = TRUE),
estimation_type = "annual"
)
result_semester
#> stat variable value se cv confint_lower
#> <char> <char> <num> <num> <num> <num>
#> 1: survey::svymean: income income 44574.23 2226.844 0.04995811 40209.69
#> 2: survey::svymean: income income 44341.53 2407.912 0.05430377 39622.10
#> 3: survey::svymean: income income 41293.92 2239.968 0.05424451 36903.66
#> 4: survey::svymean: income income 41760.06 2296.170 0.05498485 37259.64
#> 5: survey::svymean: income income 48051.67 2374.145 0.04940816 43398.43
#> 6: survey::svymean: income income 42721.21 2276.767 0.05329359 38258.83
#> confint_upper evaluate period type variance
#> <num> <char> <num> <char> <num>
#> 1: 48938.76 Excellent 2023 q1 4958836
#> 2: 49060.95 Very good 2023 q1 5798041
#> 3: 45684.18 Very good 2023 q1 5017458
#> 4: 46260.47 Very good 2023 q2 5272399
#> 5: 52704.90 Excellent 2023 q2 5636562
#> 6: 47183.59 Very good 2023 q2 5183666Use extract_surveys() to select specific periods from a
RotativePanelSurvey:
metasurvey provides utilities for working with survey edition dates:
# Extract periodicity from edition strings
extract_time_pattern("2023")
#> $year
#> [1] 2023
#>
#> $periodicity
#> [1] "Annual"
extract_time_pattern("2023-06")
#> $year
#> [1] 2023
#>
#> $month
#> [1] 6
#>
#> $periodicity
#> [1] "Monthly"# Validate edition format
validate_time_pattern(svy_type = "ech", svy_edition = "2023")
#> $svy_type
#> [1] "ech"
#>
#> $svy_edition
#> [1] 2023
#>
#> $svy_periodicity
#> [1] "Annual"# Group dates by period
dates <- as.Date(c(
"2023-01-15", "2023-03-20", "2023-06-10",
"2023-09-05", "2023-11-30"
))
group_dates(dates, type = "quarterly")
#> 2023-01-15 2023-03-20 2023-06-10 2023-09-05 2023-11-30
#> 1 1 2 3 4
group_dates(dates, type = "biannual")
#> 2023-01-15 2023-03-20 2023-06-10 2023-09-05 2023-11-30
#> 1 1 1 2 2In practice, panel data is loaded from files using
load_panel_survey():
panel <- load_panel_survey(
path_implantation = "data/ECH_implantacion_2023.csv",
path_follow_up = "data/seguimiento/",
svy_type = "ech",
svy_weight_implantation = add_weight(annual = "pesoano"),
svy_weight_follow_up = add_weight(monthly = "pesomes")
)
# Access components
imp <- get_implantation(panel)
fups <- get_follow_up(panel)For surveys that provide bootstrap replicate weights (such as the
ECH), use add_replicate() inside add_weight()
to configure robust variance estimation:
panel <- load_panel_survey(
path_implantation = "data/ECH_implantacion_2023.csv",
path_follow_up = "data/seguimiento/",
svy_type = "ech",
svy_weight_implantation = add_weight(
annual = add_replicate(
weight = "pesoano",
replicate_pattern = "wr\\d+",
replicate_path = "data/pesos_replicados_anual.csv",
replicate_id = c("numero" = "numero"),
replicate_type = "bootstrap"
)
),
svy_weight_follow_up = add_weight(monthly = "pesomes")
)When replicate weights are configured, workflow()
automatically uses survey::svrepdesign() for variance
estimation instead of the standard Taylor linearization approach.
workflow() and
RecipeWorkflow