这显示了修改后的问题的点状图和堆积条形图。思考可视化的一些指导:您是否已经知道数据中的“故事”?如果没有,那么您可能需要通过许多可视化来发现故事,构建最能展示故事的可视化。
df <- read.table(text='subject choice agegroup sex ses
John square 2 Female A
John triangle 2 Female A
John triangle 2 Female A
Mary circle 2 Female C
Mary square 2 Female C
Mary rectangle 2 Female C
Mary square 2 Female C
Hodor hodor 5 Male D
Hodor hodor 5 Male D
Hodor hodor 5 Male D
Hodor hodor 5 Male D
Jill square 3 Female B
Jill circle 3 Female B
Jill square 3 Female B
Jill hodor 3 Female B
Jill triangle 3 Female B
Jill rectangle 3 Female B', header=TRUE)
library(tidyverse)
#> ── Attaching packages ──────────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
#> ✔ tibble 1.4.2 ✔ dplyr 0.7.4
#> ✔ tidyr 0.8.0 ✔ stringr 1.3.0
#> ✔ readr 1.1.1 ✔ forcats 0.3.0
#> ── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
# agegroup is read as numeric - convert to a factor
df$agegroup <- factor(df$agegroup)
# Create dataframe by subject (check for data issues!!)
df_subject <- df %>%
group_by(subject, agegroup, ses, sex) %>%
summarize()
df_subject
#> # A tibble: 4 x 4
#> # Groups: subject, agegroup, ses [?]
#> subject agegroup ses sex
#> <fct> <fct> <fct> <fct>
#> 1 Hodor 5 D Male
#> 2 Jill 3 B Female
#> 3 John 2 A Female
#> 4 Mary 2 C Female
# calculate the proportionate choice by subject
df_subject_choice <- df %>%
# summarize the counts by the finest group to analyze
group_by(subject, choice) %>%
summarize(n=n()) %>%
# calculate proportions based on counts
mutate(p=prop.table(n))
df_subject_choice
#> # A tibble: 11 x 4
#> # Groups: subject [4]
#> subject choice n p
#> <fct> <fct> <int> <dbl>
#> 1 Hodor hodor 4 1.00
#> 2 Jill circle 1 0.167
#> 3 Jill hodor 1 0.167
#> 4 Jill rectangle 1 0.167
#> 5 Jill square 2 0.333
#> 6 Jill triangle 1 0.167
#> 7 John square 1 0.333
#> 8 John triangle 2 0.667
#> 9 Mary circle 1 0.250
#> 10 Mary rectangle 1 0.250
#> 11 Mary square 2 0.500
# Put the results together by joining
df_joined <- df_subject_choice %>%
left_join(df_subject, by = "subject") %>%
select(subject, ses, sex, agegroup, choice, p)
df_joined
#> # A tibble: 11 x 6
#> # Groups: subject [4]
#> subject ses sex agegroup choice p
#> <fct> <fct> <fct> <fct> <fct> <dbl>
#> 1 Hodor D Male 5 hodor 1.00
#> 2 Jill B Female 3 circle 0.167
#> 3 Jill B Female 3 hodor 0.167
#> 4 Jill B Female 3 rectangle 0.167
#> 5 Jill B Female 3 square 0.333
#> 6 Jill B Female 3 triangle 0.167
#> 7 John A Female 2 square 0.333
#> 8 John A Female 2 triangle 0.667
#> 9 Mary C Female 2 circle 0.250
#> 10 Mary C Female 2 rectangle 0.250
#> 11 Mary C Female 2 square 0.500
# Summarize to whatever level to analyze (Note that this may be possible directly in ggplot)
df_summary <- df_joined %>%
group_by(agegroup, ses, sex, choice) %>%
summarize(p_mean = mean(p))
df_summary
#> # A tibble: 11 x 5
#> # Groups: agegroup, ses, sex [?]
#> agegroup ses sex choice p_mean
#> <fct> <fct> <fct> <fct> <dbl>
#> 1 2 A Female square 0.333
#> 2 2 A Female triangle 0.667
#> 3 2 C Female circle 0.250
#> 4 2 C Female rectangle 0.250
#> 5 2 C Female square 0.500
#> 6 3 B Female circle 0.167
#> 7 3 B Female hodor 0.167
#> 8 3 B Female rectangle 0.167
#> 9 3 B Female square 0.333
#> 10 3 B Female triangle 0.167
#> 11 5 D Male hodor 1.00
# Plot points
ggplot(df_summary, aes(x = ses, y = choice, color = agegroup, size = p_mean)) +
geom_point() +
facet_wrap(~sex)
# Plot faceted 100% stacked bar
ggplot(df_summary, aes(x = agegroup, y = p_mean, color = choice, fill=choice)) +
geom_col() +
facet_grid(sex~ses)