OVERVIEW

This document provides R-based analyses that complement the main NLP pipeline (run in Python/Colab). These analyses require R’s regression infrastructure and are not duplicated in the Python notebook.

What this document covers (unique to R):

  1. Incremental Validity: Do text features predict punitiveness beyond psychological measures? (with vignette covariate)
  2. Standardized Coefficients: Relative importance of text vs. psychological predictors
  3. Text Predicting Sentencing: Regression models for sentence length (with vignette covariate)
  4. Political Ideology Moderation: Does the cultural default hold across the political spectrum?
  5. Vignette Moderation: ANOVA and vignette-specific patterns

What is handled in the Python notebook (not duplicated here):

  • Facade detection correlations (text × psychological measures)
  • Group comparisons (high vs. low hostile)
  • Cross-method convergence (BART ML, FC, Dictionary, BERT Similarity)
  • Competing interpretations (facade vs. sincerity vs. folk theory)
  • Deterrence regression / folk theory test
  • Individual-level directional tests (hostile_agg × all text features)
  • Prototype sensitivity analysis
  • Collective facade evidence

SECTION 0: SETUP

required_packages <- c(
  "tidyverse",
  "psych",
  "broom",
  "knitr",
  "car",
  "effectsize",
  "scales",
  "lmerTest",
  "performance"
)

new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(required_packages, library, character.only = TRUE)
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "psych"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "broom"     "psych"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "knitr"     "broom"     "psych"     "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "car"       "carData"   "knitr"     "broom"     "psych"     "lubridate"
##  [7] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"    
## [13] "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [19] "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "effectsize" "car"        "carData"    "knitr"      "broom"     
##  [6] "psych"      "lubridate"  "forcats"    "stringr"    "dplyr"     
## [11] "purrr"      "readr"      "tidyr"      "tibble"     "ggplot2"   
## [16] "tidyverse"  "stats"      "graphics"   "grDevices"  "utils"     
## [21] "datasets"   "methods"    "base"      
## 
## [[7]]
##  [1] "scales"     "effectsize" "car"        "carData"    "knitr"     
##  [6] "broom"      "psych"      "lubridate"  "forcats"    "stringr"   
## [11] "dplyr"      "purrr"      "readr"      "tidyr"      "tibble"    
## [16] "ggplot2"    "tidyverse"  "stats"      "graphics"   "grDevices" 
## [21] "utils"      "datasets"   "methods"    "base"      
## 
## [[8]]
##  [1] "lmerTest"   "lme4"       "Matrix"     "scales"     "effectsize"
##  [6] "car"        "carData"    "knitr"      "broom"      "psych"     
## [11] "lubridate"  "forcats"    "stringr"    "dplyr"      "purrr"     
## [16] "readr"      "tidyr"      "tibble"     "ggplot2"    "tidyverse" 
## [21] "stats"      "graphics"   "grDevices"  "utils"      "datasets"  
## [26] "methods"    "base"      
## 
## [[9]]
##  [1] "performance" "lmerTest"    "lme4"        "Matrix"      "scales"     
##  [6] "effectsize"  "car"         "carData"     "knitr"       "broom"      
## [11] "psych"       "lubridate"   "forcats"     "stringr"     "dplyr"      
## [16] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [21] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [26] "datasets"    "methods"     "base"
options(scipen = 999)
options(digits = 4)
# Load the NLP features dataset (contains both NLP and psychological variables)
# UPDATE PATH AS NEEDED
df <- read.csv("/Users/dgkamper/Library/CloudStorage/[email protected]/My Drive/DGK Lab/Collaborations/Dan Simon/Punishment/Analysis/NLP Pipeline/Second Pass/punishment_212_nlp_features.csv", stringsAsFactors = FALSE)

cat("Loaded dataset: N =", nrow(df), "rows,", ncol(df), "columns\n\n")
## Loaded dataset: N = 496 rows, 258 columns
# Verify key variables
nlp_vars <- c("sim_prosocial_mean", "sim_dark_mean", "sim_prosocial_minus_dark",
              "vader_compound", "just_prosocial", "just_dark", 
              "zs_prosocial_mean", "zs_dark_mean", "facade_residual")
psych_vars <- c("punitiveness_agg", "hostile_agg", "crime_concerns_agg",
                "hatred_comp", "revenge_comp", "sdo_comp")

cat("NLP variables present:", sum(nlp_vars %in% names(df)), "/", length(nlp_vars), "\n")
## NLP variables present: 9 / 9
cat("Psych variables present:", sum(psych_vars %in% names(df)), "/", length(psych_vars), "\n")
## Psych variables present: 6 / 6

SECTION 1: INCREMENTAL VALIDITY

1.1 Do Text Features Predict Punitiveness Beyond Psychological Measures?

cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS\n")
## INCREMENTAL VALIDITY: TEXT FEATURES PREDICTING PUNITIVENESS
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
# All models include factor(vignette) to account for between-condition differences
# (Main RMD found vignette random effect variance ≈ 0, so fixed effect is conservative)

# Step 1: Psychological predictors only (+ vignette)
model1 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
               emotions_agg + personality_agg, data = df)

cat("Model 1: Psychological Predictors (+ vignette covariate)\n")
## Model 1: Psychological Predictors (+ vignette covariate)
cat(sprintf("  R² = %.3f, Adj R² = %.3f\n", 
            summary(model1)$r.squared, summary(model1)$adj.r.squared))
##   R² = 0.387, Adj R² = 0.380
cat("  Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg\n\n")
##   Predictors: hostile_agg, crime_concerns_agg, emotions_agg, personality_agg
# Step 2: Add text features
model2 <- lm(punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
               emotions_agg + personality_agg +
               sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)

cat("Model 2: Psychological + Text Features (+ vignette covariate)\n")
## Model 2: Psychological + Text Features (+ vignette covariate)
cat(sprintf("  R² = %.3f, Adj R² = %.3f\n\n", 
            summary(model2)$r.squared, summary(model2)$adj.r.squared))
##   R² = 0.443, Adj R² = 0.432
# Compare models
r2_change <- summary(model2)$r.squared - summary(model1)$r.squared
anova_comparison <- anova(model1, model2)

cat(sprintf("Incremental R² (ΔR²): %.4f\n\n", r2_change))
## Incremental R² (ΔR²): 0.0552
cat("Model comparison F-test:\n")
## Model comparison F-test:
print(anova_comparison)
## Analysis of Variance Table
## 
## Model 1: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     emotions_agg + personality_agg
## Model 2: punitiveness_agg ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     emotions_agg + personality_agg + sim_prosocial_mean + sim_dark_mean + 
##     vader_compound
##   Res.Df RSS Df Sum of Sq  F        Pr(>F)    
## 1    489 202                                  
## 2    486 184  3      18.2 16 0.00000000059 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nModel 2 Coefficients (excluding vignette dummies):\n")
## 
## Model 2 Coefficients (excluding vignette dummies):
print(tidy(model2) %>% 
        filter(!grepl("vignette", term)) %>%
        mutate(across(where(is.numeric), ~round(., 3))) %>%
        select(term, estimate, std.error, statistic, p.value))
## # A tibble: 8 × 5
##   term               estimate std.error statistic p.value
##   <chr>                 <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)          -1.77      0.2       -8.85   0    
## 2 hostile_agg           0.216     0.037      5.80   0    
## 3 crime_concerns_agg    0.051     0.027      1.91   0.057
## 4 emotions_agg          0.132     0.029      4.49   0    
## 5 personality_agg       0.082     0.036      2.30   0.022
## 6 sim_prosocial_mean   -2.31      0.439     -5.26   0    
## 7 sim_dark_mean         1.72      0.403      4.27   0    
## 8 vader_compound       -0.224     0.05      -4.46   0
# Robustness: Compare with and without vignette covariate
model2_novign <- lm(punitiveness_agg ~ hostile_agg + crime_concerns_agg + 
                      emotions_agg + personality_agg +
                      sim_prosocial_mean + sim_dark_mean + vader_compound, data = df)
cat(sprintf("\nRobustness: R² without vignette covariate: %.3f (diff = %.4f)\n",
            summary(model2_novign)$r.squared,
            summary(model2)$r.squared - summary(model2_novign)$r.squared))
## 
## Robustness: R² without vignette covariate: 0.442 (diff = 0.0010)

1.2 Standardized Coefficients

cat("\n=== STANDARDIZED COEFFICIENTS (β) ===\n\n")
## 
## === STANDARDIZED COEFFICIENTS (β) ===
cat("(Vignette dummies included but not shown; continuous predictors standardized)\n\n")
## (Vignette dummies included but not shown; continuous predictors standardized)
model2_std <- lm(scale(punitiveness_agg) ~ factor(vignette) + 
                   scale(hostile_agg) + scale(crime_concerns_agg) + 
                   scale(emotions_agg) + scale(personality_agg) +
                   scale(sim_prosocial_mean) + scale(sim_dark_mean) + 
                   scale(vader_compound), data = df)

std_coefs <- tidy(model2_std) %>%
  filter(term != "(Intercept)" & !grepl("vignette", term)) %>%
  mutate(
    term = c("Hostile Agg", "Crime Concerns", "Emotions", "Personality",
             "Prosocial Sim", "Dark Sim", "Sentiment"),
    estimate = round(estimate, 3),
    p.value = round(p.value, 4)
  ) %>%
  select(Predictor = term, Beta = estimate, p = p.value) %>%
  arrange(desc(abs(Beta)))

print(std_coefs, row.names = FALSE)
## # A tibble: 7 × 3
##   Predictor        Beta      p
##   <chr>           <dbl>  <dbl>
## 1 Hostile Agg     0.319 0     
## 2 Prosocial Sim  -0.228 0     
## 3 Emotions        0.22  0     
## 4 Dark Sim        0.185 0     
## 5 Sentiment      -0.153 0     
## 6 Personality     0.106 0.022 
## 7 Crime Concerns  0.073 0.0568
cat("\nInterpretation:\n")
## 
## Interpretation:
text_betas <- std_coefs %>% filter(Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment"))
max_text_beta <- max(abs(text_betas$Beta))
max_psych_beta <- max(abs(std_coefs$Beta[!std_coefs$Predictor %in% c("Prosocial Sim", "Dark Sim", "Sentiment")]))

cat(sprintf("Largest text feature β: %.3f\n", max_text_beta))
## Largest text feature β: 0.228
cat(sprintf("Largest psychological β: %.3f\n", max_psych_beta))
## Largest psychological β: 0.319
if(max_text_beta < 0.10) {
  cat("Text features show minimal unique contribution beyond psychological measures.\n")
  cat("Language reflects the psychological profile but does not add to it.\n")
} else {
  cat("Text features show some unique contribution beyond psychological measures.\n")
}
## Text features show some unique contribution beyond psychological measures.

SECTION 2: TEXT PREDICTING SENTENCING BEHAVIOR

2.1 Correlations: Text Features × Sentence Length

cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("TEXT FEATURES PREDICTING SENTENCING BEHAVIOR\n")
## TEXT FEATURES PREDICTING SENTENCING BEHAVIOR
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
text_for_sentence <- c("sim_prosocial_mean", "sim_dark_mean", 
                       "sim_proto_rehabilitation", "sim_proto_deterrence",
                       "sim_proto_revenge", "sim_proto_suffering",
                       "zs_rehabilitation_and_reform", "zs_deterrence_and_prevention",
                       "zs_revenge_and_payback", "zs_punishment_and_suffering",
                       "vader_compound", "just_dark", "just_prosocial")
text_for_sentence <- text_for_sentence[text_for_sentence %in% names(df)]

sentence_cors <- data.frame()
for(var in text_for_sentence) {
  test <- cor.test(df[[var]], df$Sentence_z, use = "pairwise.complete.obs")
  sentence_cors <- rbind(sentence_cors, data.frame(
    Text_Feature = var,
    r = round(test$estimate, 3),
    p = test$p.value,
    sig = ifelse(test$p.value < .001, "***",
                 ifelse(test$p.value < .01, "**",
                        ifelse(test$p.value < .05, "*", "")))
  ))
}

sentence_cors <- sentence_cors %>% arrange(r)
cat("Text Features × Sentence (z-scored):\n")
## Text Features × Sentence (z-scored):
print(sentence_cors %>% mutate(p = format(p, scientific = FALSE, digits = 4)), row.names = FALSE)
##                  Text_Feature      r                               p sig
##  zs_rehabilitation_and_reform -0.451 0.00000000000000000000000002864 ***
##                vader_compound -0.219 0.00000087362351588873359931558 ***
##      sim_proto_rehabilitation -0.212 0.00000195824699748705184702792 ***
##            sim_prosocial_mean -0.093 0.03749698177493523965075894466   *
##                just_prosocial -0.084 0.06020641098299508997548912248    
##          sim_proto_deterrence -0.082 0.06640940330856597018094333862    
##           sim_proto_suffering  0.051 0.25998734759621255907546810704    
##             sim_proto_revenge  0.082 0.06817465281839056590040826222    
##                 sim_dark_mean  0.109 0.01517783457620437009838454401   *
##                     just_dark  0.156 0.00047514372812243872669021871 ***
##   zs_punishment_and_suffering  0.164 0.00023952531985010720987777644 ***
##  zs_deterrence_and_prevention  0.215 0.00000142552426173001954844448 ***
##        zs_revenge_and_payback  0.291 0.00000000003953194372538350949 ***

2.2 Regression: Psychological + Text Predicting Sentence

cat("\n=== REGRESSION: SENTENCE LENGTH ===\n\n")
## 
## === REGRESSION: SENTENCE LENGTH ===
cat("Note: Sentence_z is already z-scored within vignette, but we include\n")
## Note: Sentence_z is already z-scored within vignette, but we include
cat("vignette as covariate for robustness.\n\n")
## vignette as covariate for robustness.
# Model 1: Psychological predictors (+ vignette)
sent_model1 <- lm(Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
                    punitiveness_8item, data = df)

cat("Model 1: Psychological Predictors (+ vignette)\n")
## Model 1: Psychological Predictors (+ vignette)
cat(sprintf("  R² = %.3f, Adj R² = %.3f\n\n", 
            summary(sent_model1)$r.squared, summary(sent_model1)$adj.r.squared))
##   R² = 0.117, Adj R² = 0.108
# Model 2: Add text features
sent_text_vars <- c("zs_rehabilitation_and_reform", "vader_compound")
sent_text_vars <- sent_text_vars[sent_text_vars %in% names(df)]

if(length(sent_text_vars) > 0) {
  formula_str <- paste("Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + punitiveness_8item +",
                       paste(sent_text_vars, collapse = " + "))
  sent_model2 <- lm(as.formula(formula_str), data = df)
  
  cat("Model 2: Psychological + Text Features (+ vignette)\n")
  cat(sprintf("  R² = %.3f, Adj R² = %.3f\n", 
              summary(sent_model2)$r.squared, summary(sent_model2)$adj.r.squared))
  sent_r2_change <- summary(sent_model2)$r.squared - summary(sent_model1)$r.squared
  cat(sprintf("  ΔR² = %.4f\n\n", sent_r2_change))
  
  cat("Model comparison:\n")
  print(anova(sent_model1, sent_model2))
  
  cat("\nModel 2 Coefficients (excluding vignette dummies):\n")
  print(tidy(sent_model2) %>% 
          filter(!grepl("vignette", term)) %>%
          mutate(across(where(is.numeric), ~round(., 3))))
}
## Model 2: Psychological + Text Features (+ vignette)
##   R² = 0.258, Adj R² = 0.247
##   ΔR² = 0.1414
## 
## Model comparison:
## Analysis of Variance Table
## 
## Model 1: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     punitiveness_8item
## Model 2: Sentence_z ~ factor(vignette) + hostile_agg + crime_concerns_agg + 
##     punitiveness_8item + zs_rehabilitation_and_reform + vader_compound
##   Res.Df RSS Df Sum of Sq    F              Pr(>F)    
## 1    490 435                                          
## 2    488 366  2      69.7 46.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model 2 Coefficients (excluding vignette dummies):
## # A tibble: 6 × 5
##   term                         estimate std.error statistic p.value
##   <chr>                           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)                    -0.566     0.192    -2.94    0.003
## 2 hostile_agg                    -0.012     0.047    -0.25    0.803
## 3 crime_concerns_agg              0.034     0.037     0.927   0.355
## 4 punitiveness_8item              0.177     0.045     3.93    0    
## 5 zs_rehabilitation_and_reform   -1.07      0.127    -8.41    0    
## 6 vader_compound                 -0.129     0.075    -1.72    0.086

SECTION 3: VISUALIZATION

3.1 Scatter: Hostile Aggression × Prosocial Language

test1 <- cor.test(df$sim_prosocial_mean, df$hostile_agg)

ggplot(df, aes(x = hostile_agg, y = sim_prosocial_mean, color = punitiveness_agg)) +
  geom_point(alpha = 0.6, size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
  scale_color_gradient2(low = "#3498DB", mid = "#F1C40F", high = "#E74C3C",
                        midpoint = mean(df$punitiveness_agg, na.rm = TRUE),
                        name = "Punitiveness") +
  labs(x = "Hostile Aggression (Quantitative)",
       y = "Prosocial Language Similarity",
       title = "Hostile Aggression vs. Prosocial Language",
       subtitle = sprintf("r = %.3f, p = %.4f", 
                          test1$estimate, test1$p.value)) +
  theme_minimal() +
  theme(legend.position = "right")

ggsave("facade_scatter_plot.png", width = 10, height = 6, dpi = 150)
cat("Saved: facade_scatter_plot.png\n")
## Saved: facade_scatter_plot.png

SECTION 4: POLITICAL IDEOLOGY MODERATION

If the cultural default interpretation is correct, liberals and conservatives should both use prosocial language to justify punishment — despite conservatives being more punitive on average. This section tests whether political orientation moderates the language patterns.

4.1 Language Features by Political Group

cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("POLITICAL IDEOLOGY MODERATION\n")
## POLITICAL IDEOLOGY MODERATION
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
# Create political groups (matching main RMD: 1-3 = Liberal, 4 = Moderate, 5-7 = Conservative)
if(!"political_group" %in% names(df) || all(is.na(df$political_group))) {
  df <- df %>%
    mutate(political_group = case_when(
      politid <= 3 ~ "Liberal",
      politid == 4 ~ "Moderate",
      politid >= 5 ~ "Conservative"
    ))
}

cat("Group sizes:\n")
## Group sizes:
print(table(df$political_group, useNA = "ifany"))
## 
## Conservative      Liberal     Moderate 
##          190          190          116
cat("\n")
# Mean punitiveness by group (context)
cat("Punitiveness by Political Group (context):\n")
## Punitiveness by Political Group (context):
pun_by_group <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    Mean_Punitiveness = round(mean(punitiveness_agg, na.rm = TRUE), 3),
    SD_Punitiveness = round(sd(punitiveness_agg, na.rm = TRUE), 3),
    Mean_Hostile = round(mean(hostile_agg, na.rm = TRUE), 3)
  )
print(as.data.frame(pun_by_group), row.names = FALSE)
##  political_group   n Mean_Punitiveness SD_Punitiveness Mean_Hostile
##     Conservative 190             0.259           0.742        4.125
##          Liberal 190            -0.313           0.811        3.329
##         Moderate 116             0.088           0.775        3.912
# Text features by group
cat("\n\nText Features by Political Group:\n")
## 
## 
## Text Features by Political Group:
cat("-" %>% rep(60) %>% paste(collapse = ""), "\n")
## ------------------------------------------------------------
text_by_politics <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    Prosocial_Sim = round(mean(sim_prosocial_mean, na.rm = TRUE), 3),
    Dark_Sim = round(mean(sim_dark_mean, na.rm = TRUE), 3),
    ProDark_Gap = round(mean(sim_prosocial_minus_dark, na.rm = TRUE), 3),
    Sentiment = round(mean(vader_compound, na.rm = TRUE), 3),
    Dict_Prosocial = round(mean(just_prosocial, na.rm = TRUE), 3),
    Dict_Dark = round(mean(just_dark, na.rm = TRUE), 3)
  )
print(as.data.frame(text_by_politics), row.names = FALSE)
##  political_group   n Prosocial_Sim Dark_Sim ProDark_Gap Sentiment
##     Conservative 190         0.384    0.414      -0.030    -0.594
##          Liberal 190         0.392    0.425      -0.033    -0.407
##         Moderate 116         0.389    0.423      -0.034    -0.506
##  Dict_Prosocial Dict_Dark
##           0.056     0.004
##           0.052     0.003
##           0.056     0.007

4.2 Cultural Default Test: Prosocial Language × Hostile Aggression by Group

cat("\n\nKey test: Does hostile_agg × prosocial language differ by political group?\n")
## 
## 
## Key test: Does hostile_agg × prosocial language differ by political group?
cat("If cultural default holds, correlations should be near zero for ALL groups.\n\n")
## If cultural default holds, correlations should be near zero for ALL groups.
political_cors <- df %>%
  filter(!is.na(political_group)) %>%
  group_by(political_group) %>%
  summarise(
    n = n(),
    r_prosocial_hostile = round(cor(sim_prosocial_mean, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_dark_hostile = round(cor(sim_dark_mean, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_sentiment_hostile = round(cor(vader_compound, hostile_agg, use = "pairwise.complete.obs"), 3),
    r_sentiment_pun = round(cor(vader_compound, punitiveness_agg, use = "pairwise.complete.obs"), 3),
    pct_closer_dark = round(mean(sim_prosocial_minus_dark < 0, na.rm = TRUE) * 100, 1)
  )
print(as.data.frame(political_cors), row.names = FALSE)
##  political_group   n r_prosocial_hostile r_dark_hostile r_sentiment_hostile
##     Conservative 190              -0.075          0.042               0.015
##          Liberal 190               0.026          0.050               0.028
##         Moderate 116              -0.046         -0.114              -0.112
##  r_sentiment_pun pct_closer_dark
##           -0.054            64.7
##           -0.159            67.9
##           -0.303            71.6
cat("\nInterpretation:\n")
## 
## Interpretation:
cat("If r_prosocial_hostile is near zero across all groups → cultural default holds\n")
## If r_prosocial_hostile is near zero across all groups → cultural default holds
cat("universally: liberals, moderates, and conservatives all use prosocial framing\n")
## universally: liberals, moderates, and conservatives all use prosocial framing
cat("regardless of their hostile attitudes.\n\n")
## regardless of their hostile attitudes.
cat("If pct_closer_dark is similar across groups → the semantic mismatch\n")
## If pct_closer_dark is similar across groups → the semantic mismatch
cat("(prosocial words, dark meaning) is not a conservative or liberal phenomenon.\n")
## (prosocial words, dark meaning) is not a conservative or liberal phenomenon.

SECTION 5: VIGNETTE MODERATION

5.1 Text Features by Vignette

cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("TEXT FEATURES BY VIGNETTE\n")
## TEXT FEATURES BY VIGNETTE
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
vignette_labels <- c("1" = "Stranger Felony-Murder", 
                     "2" = "Domestic Violence", 
                     "3" = "Organized Crime")

vignette_text <- df %>%
  group_by(vignette) %>%
  summarise(
    n = n(),
    Prosocial_Sim = mean(sim_prosocial_mean, na.rm = TRUE),
    Dark_Sim = mean(sim_dark_mean, na.rm = TRUE),
    Sentiment = mean(vader_compound, na.rm = TRUE),
    Word_Count = mean(text_combined_wordcount, na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric) & !c(vignette, n), ~round(., 3)))

vignette_text$Vignette_Label <- vignette_labels[as.character(vignette_text$vignette)]

print(vignette_text, row.names = FALSE)
## # A tibble: 3 × 7
##   vignette     n Prosocial_Sim Dark_Sim Sentiment Word_Count Vignette_Label     
##      <int> <int>         <dbl>    <dbl>     <dbl>      <dbl> <chr>              
## 1        1   168         0.396    0.432    -0.459       48   Stranger Felony-Mu…
## 2        2   176         0.381    0.418    -0.504       45.7 Domestic Violence  
## 3        3   152         0.387    0.411    -0.546       51.6 Organized Crime
cat("\nANOVA: Prosocial Similarity by Vignette\n")
## 
## ANOVA: Prosocial Similarity by Vignette
aov_prosocial <- aov(sim_prosocial_mean ~ factor(vignette), data = df)
print(summary(aov_prosocial))
##                   Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette)   2   0.02 0.01041    1.61    0.2
## Residuals        493   3.18 0.00646
cat("\nANOVA: Dark Similarity by Vignette\n")
## 
## ANOVA: Dark Similarity by Vignette
aov_dark <- aov(sim_dark_mean ~ factor(vignette), data = df)
print(summary(aov_dark))
##                   Df Sum Sq Mean Sq F value Pr(>F)  
## factor(vignette)   2   0.04 0.01834    2.39  0.093 .
## Residuals        493   3.79 0.00768                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nANOVA: Sentiment by Vignette\n")
## 
## ANOVA: Sentiment by Vignette
aov_sentiment <- aov(vader_compound ~ factor(vignette), data = df)
print(summary(aov_sentiment))
##                   Df Sum Sq Mean Sq F value Pr(>F)
## factor(vignette)   2    0.6   0.308       1   0.37
## Residuals        493  152.1   0.309

SECTION 6: EXPORT AND SUMMARY

cat("\n=== SAVING OUTPUTS ===\n\n")
## 
## === SAVING OUTPUTS ===
write.csv(sentence_cors, "nlp_sentence_correlations.csv", row.names = FALSE)
cat("Saved: nlp_sentence_correlations.csv\n")
## Saved: nlp_sentence_correlations.csv
write.csv(as.data.frame(vignette_text), "nlp_vignette_text_features.csv", row.names = FALSE)
cat("Saved: nlp_vignette_text_features.csv\n")
## Saved: nlp_vignette_text_features.csv
write.csv(as.data.frame(political_cors), "nlp_political_moderation.csv", row.names = FALSE)
cat("Saved: nlp_political_moderation.csv\n")
## Saved: nlp_political_moderation.csv
cat("\n")
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)\n")
## NLP INTEGRATION SUMMARY (R-SPECIFIC ANALYSES)
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n\n")
## ======================================================================
cat("All regressions include factor(vignette) as covariate.\n\n")
## All regressions include factor(vignette) as covariate.
cat("1. INCREMENTAL VALIDITY:\n")
## 1. INCREMENTAL VALIDITY:
cat(sprintf("   ΔR² when adding text to psychological predictors: %.4f\n", r2_change))
##    ΔR² when adding text to psychological predictors: 0.0552
if(r2_change < 0.01) {
  cat("   Text features add negligible unique variance.\n")
  cat("   Language reflects the psychological profile but does not improve prediction.\n\n")
} else {
  cat("   Text features add modest unique variance.\n\n")
}
##    Text features add modest unique variance.
cat("2. STANDARDIZED COEFFICIENTS:\n")
## 2. STANDARDIZED COEFFICIENTS:
cat("   See Section 1.2 for relative importance of text vs. psychological predictors.\n\n")
##    See Section 1.2 for relative importance of text vs. psychological predictors.
cat("3. TEXT PREDICTING SENTENCING:\n")
## 3. TEXT PREDICTING SENTENCING:
if(exists("sent_r2_change")) {
  cat(sprintf("   ΔR² for text beyond psychological predictors: %.4f\n", sent_r2_change))
}
##    ΔR² for text beyond psychological predictors: 0.1414
if("zs_rehabilitation_and_reform" %in% names(df)) {
  rehab_r <- cor(df$zs_rehabilitation_and_reform, df$Sentence_z, use = "pairwise.complete.obs")
  cat(sprintf("   Rehabilitation language × Sentence: r = %.3f\n", rehab_r))
}
##    Rehabilitation language × Sentence: r = -0.451
sent_r <- cor(df$vader_compound, df$Sentence_z, use = "pairwise.complete.obs")
cat(sprintf("   Sentiment × Sentence: r = %.3f\n\n", sent_r))
##    Sentiment × Sentence: r = -0.219
cat("4. POLITICAL IDEOLOGY MODERATION:\n")
## 4. POLITICAL IDEOLOGY MODERATION:
cat("   Liberals, moderates, and conservatives all use prosocial language.\n")
##    Liberals, moderates, and conservatives all use prosocial language.
if(all_ns) {
  cat("   No significant ideology × hostility interactions.\n")
  cat("   Cultural default holds across the political spectrum.\n\n")
} else {
  cat("   Some interactions detected — see Section 4.3.\n\n")
}
##    No significant ideology × hostility interactions.
##    Cultural default holds across the political spectrum.
cat("5. VIGNETTE MODERATION:\n")
## 5. VIGNETTE MODERATION:
cat("   See Section 5.1 for ANOVA results across crime types.\n\n")
##    See Section 5.1 for ANOVA results across crime types.
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================
cat("For facade detection, cross-method convergence, competing interpretations,\n")
## For facade detection, cross-method convergence, competing interpretations,
cat("prototype sensitivity, and individual-level directional tests, see the\n")
## prototype sensitivity, and individual-level directional tests, see the
cat("Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).\n")
## Python NLP notebook (Punishment_212_NLP_Analysis_v2.ipynb).
cat("=" %>% rep(70) %>% paste(collapse = ""), "\n")
## ======================================================================