r-stats - SKILL.md Agent Skill

name: r-stats description: Statistical analysis for psychology research using R. Use when conducting power analysis, data cleaning, hypothesis testing (t-test, ANOVA, regression, mixed models), effect size calculation, data visualization with ggplot2, or generating APA-formatted results tables. Also use for meta-analysis (metafor) and structural equation modeling (lavaan). metadata: {"openclaw":{"requires":{"bins":["Rscript"]}}}

R Statistical Analysis for Psychology Research

This skill provides R code templates and guidance for psychology data analysis.

Environment Setup

# Required packages — install if not present
required_packages <- c(
  "tidyverse",    # data wrangling + ggplot2
  "pwr",          # power analysis
  "effectsize",   # Cohen's d, eta-squared, etc.
  "lme4",         # mixed effects models
  "lmerTest",     # p-values for mixed models
  "lavaan",       # SEM
  "metafor",      # meta-analysis
  "psych",        # psychometric functions (alpha, EFA)
  "apaTables",    # APA-formatted tables
  "report",       # automatic APA reporting
  "here",         # reproducible file paths
  "janitor"       # data cleaning utilities
)

install.packages(setdiff(required_packages, rownames(installed.packages())))
lapply(required_packages, library, character.only = TRUE)

# Always set seed for reproducibility
set.seed(42)

Power Analysis Templates

library(pwr)

# Independent samples t-test
pwr.t.test(d = 0.5, sig.level = .05, power = .80, type = "two.sample")

# Paired t-test
pwr.t.test(d = 0.5, sig.level = .05, power = .80, type = "paired")

# One-way ANOVA (k groups)
pwr.anova.test(k = 3, f = 0.25, sig.level = .05, power = .80)

# Correlation
pwr.r.test(r = 0.3, sig.level = .05, power = .80)

# Multiple regression (u predictors)
pwr.f2.test(u = 3, f2 = 0.15, sig.level = .05, power = .80)

# Chi-square
pwr.chisq.test(w = 0.3, df = 2, sig.level = .05, power = .80)

Data Cleaning Template

library(tidyverse)
library(here)

# Load data
raw_data <- read_csv(here("data", "raw_data.csv"))

# ── Exclusion criteria ────────────────────────────────────────────────────────
# 1. Completion time (< 50% of median = speeder)
median_time <- median(raw_data$duration_s, na.rm = TRUE)
excluded_speeders <- raw_data %>% 
  filter(duration_s < 0.5 * median_time)

# 2. Attention checks
excluded_attn <- raw_data %>%
  filter(attn_check_1 != 4 | attn_check_2 != 2)  # expected responses

# 3. Apply exclusions
data_clean <- raw_data %>%
  filter(
    duration_s >= 0.5 * median_time,
    attn_check_1 == 4,
    attn_check_2 == 2
  )

# Report exclusions
cat("Original N:", nrow(raw_data), "\n")
cat("Excluded (speeders):", nrow(excluded_speeders), "\n")
cat("Excluded (attention):", nrow(excluded_attn) - sum(excluded_attn$id %in% excluded_speeders$id), "\n")
cat("Final N:", nrow(data_clean), "\n")

# ── Scale scoring ─────────────────────────────────────────────────────────────
# Reverse score items (replace 4 with relevant max)
data_clean <- data_clean %>%
  mutate(
    item_3r = 8 - item_3,  # 7-point scale
    item_7r = 8 - item_7
  )

# Compute scale means
data_clean <- data_clean %>%
  mutate(
    scale_dv = rowMeans(select(., item_1, item_2, item_3r), na.rm = TRUE),
    scale_cv = rowMeans(select(., item_4, item_5), na.rm = TRUE)
  )

# Internal reliability
library(psych)
alpha_dv <- alpha(data_clean %>% select(item_1, item_2, item_3r))
cat("DV reliability: α =", round(alpha_dv$total$raw_alpha, 3), "\n")

Hypothesis Testing Templates

# ── Independent samples t-test ────────────────────────────────────────────────
t_result <- t.test(dv ~ condition, data = data_clean, var.equal = FALSE)
d_result <- cohens_d(dv ~ condition, data = data_clean)

# APA report
cat(sprintf(
  "t(%s) = %.2f, p = %s, d = %.2f, 95%% CI [%.2f, %.2f]\n",
  round(t_result$parameter, 1),
  t_result$statistic,
  ifelse(t_result$p.value < .001, "< .001", sprintf("= .%03d", round(t_result$p.value * 1000))),
  d_result$Cohens_d,
  d_result$CI_low,
  d_result$CI_high
))

# ── One-way ANOVA ─────────────────────────────────────────────────────────────
library(effectsize)
aov_result <- aov(dv ~ condition, data = data_clean)
eta_sq <- eta_squared(aov_result, partial = TRUE, ci = 0.90)
summary(aov_result)

# Post-hoc (Tukey)
TukeyHSD(aov_result)

# ── Multiple regression ───────────────────────────────────────────────────────
model <- lm(dv ~ pred1 + pred2 + pred3, data = data_clean)
summary(model)
# Standardized betas
library(effectsize)
standardize_parameters(model)

# ── Mixed effects model ───────────────────────────────────────────────────────
library(lme4); library(lmerTest)
mixed_model <- lmer(dv ~ time * condition + (1 + time | subject_id), data = data_clean)
summary(mixed_model)
# Effect sizes for mixed models
library(effectsize)
eta_squared(mixed_model)

APA Results Formatting Helper

# Function to format p values APA style
apa_p <- function(p) {
  if (p < .001) "< .001"
  else sprintf("= .%03d", round(p * 1000))
}

# Function to generate APA t-test string
apa_ttest <- function(t_result, d_result) {
  sprintf("t(%s) = %.2f, p %s, d = %.2f, 95%% CI [%.2f, %.2f]",
    round(t_result$parameter, 1),
    t_result$statistic,
    apa_p(t_result$p.value),
    d_result$Cohens_d,
    d_result$CI_low, d_result$CI_high
  )
}

# Usage example
result_string <- apa_ttest(t_result, d_result)
cat("H1:", result_string)

Visualization Templates (ggplot2)

library(tidyverse)

# ── Bar plot with error bars (means ± 95% CI) ─────────────────────────────────
data_summary <- data_clean %>%
  group_by(condition) %>%
  summarize(
    M = mean(dv),
    SE = sd(dv) / sqrt(n()),
    CI_lower = M - 1.96 * SE,
    CI_upper = M + 1.96 * SE,
    .groups = "drop"
  )

ggplot(data_summary, aes(x = condition, y = M, fill = condition)) +
  geom_col(width = 0.6, alpha = 0.85) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
  labs(x = "Condition", y = "Mean DV Score", title = "Figure 1") +
  theme_classic() +
  theme(legend.position = "none") +
  scale_fill_grey()  # APA-friendly greyscale

# Save for manuscript
ggsave("figures/figure1.pdf", width = 6, height = 4)

# ── Violin plot ───────────────────────────────────────────────────────────────
ggplot(data_clean, aes(x = condition, y = dv, fill = condition)) +
  geom_violin(alpha = 0.5, trim = FALSE) +
  geom_boxplot(width = 0.1, outlier.shape = NA) +
  stat_summary(fun = mean, geom = "point", size = 3, shape = 18) +
  theme_classic() +
  scale_fill_grey()

Meta-Analysis Template (metafor)

library(metafor)

# Input: effect sizes from primary studies
meta_data <- data.frame(
  study = c("Smith2020", "Jones2021", "Lee2022"),
  yi    = c(0.45, 0.32, 0.58),   # Cohen's d
  vi    = c(0.04, 0.05, 0.06)    # variance
)

# Random-effects meta-analysis
res <- rma(yi, vi, data = meta_data, method = "REML")
summary(res)

# Forest plot
forest(res, slab = meta_data$study)

# Funnel plot (publication bias)
funnel(res)
regtest(res)  # Egger's test

Output for 礼部

Always provide results in this format for 礼部 to embed in the manuscript:

# Analysis Results Summary (APA Format)

## Descriptive Statistics
M = [x.xx], SD = [x.xx], N = [N], α = [.xx]

## H1: [Hypothesis statement]
[APA result string]
Interpretation: [one sentence: supported / not supported, why]

## H2: ...

Save results to memory/analyses/[study-name]/results.md.