Chapter 3 Structural equation modeling

3.0.1 The classic example of a structural equation model

3.0.2 Fitting a full structural equation model

library(haven)

sem.data <- read_dta("http://www.stata-press.com/data/r13/sem_sm2.dta")
sem.data <- as.matrix(sem.data)

library(lavaan)
sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + pwless67
  alien71 =~ anomia71 + pwless71
  ses66 =~ educ66 + occstat66
  
  # structural models
  alien67 ~ ses66
  alien71 ~ alien67 + ses66
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 933)
summary(sem.fit, standardized = TRUE)

## lavaan 0.6-9 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
##                                                       
##   Number of observations                           933
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                71.697
##   Degrees of freedom                                 6
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 =~                                                            
##     anomia67          1.000                               0.812    0.813
##     pwless67          0.999    0.047   21.436    0.000    0.811    0.812
##   alien71 =~                                                            
##     anomia71          1.000                               0.839    0.840
##     pwless71          0.951    0.045   21.265    0.000    0.798    0.798
##   ses66 =~                                                              
##     educ66            1.000                               0.832    0.833
##     occstat66         0.779    0.063   12.417    0.000    0.648    0.649
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 ~                                                             
##     ses66            -0.553    0.051  -10.888    0.000   -0.567   -0.567
##   alien71 ~                                                             
##     alien67           0.685    0.052   13.177    0.000    0.663    0.663
##     ses66            -0.153    0.047   -3.237    0.001   -0.151   -0.151
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .anomia67          0.339    0.029   11.712    0.000    0.339    0.339
##    .pwless67          0.340    0.029   11.769    0.000    0.340    0.341
##    .anomia71          0.295    0.030    9.919    0.000    0.295    0.295
##    .pwless71          0.363    0.029   12.428    0.000    0.363    0.363
##    .educ66            0.306    0.052    5.906    0.000    0.306    0.307
##    .occstat66         0.579    0.040   14.319    0.000    0.579    0.579
##    .alien67           0.448    0.040   11.242    0.000    0.679    0.679
##    .alien71           0.298    0.031    9.664    0.000    0.424    0.424
##     ses66             0.693    0.067   10.410    0.000    1.000    1.000

library(semPlot)
semPaths(object = sem.fit, what = "std", edge.label.cex = 1, curvePivot = TRUE, 
         fixedStyle = c("black", 1), freeStyle = c("black", 1), 
         edge.color = "black")

modificationindices(sem.fit)

##         lhs op       rhs     mi    epc sepc.lv sepc.all sepc.nox
## 19  alien67 =~  anomia71  2.474  0.415   0.337    0.337    0.337
## 20  alien67 =~  pwless71  2.474 -0.394  -0.320   -0.321   -0.321
## 21  alien67 =~    educ66  0.223 -0.114  -0.093   -0.093   -0.093
## 22  alien67 =~ occstat66  0.223  0.089   0.072    0.072    0.072
## 23  alien71 =~  anomia67  3.491  0.292   0.245    0.246    0.246
## 24  alien71 =~  pwless67  3.491 -0.292  -0.245   -0.245   -0.245
## 25  alien71 =~    educ66  0.223  0.066   0.055    0.055    0.055
## 26  alien71 =~ occstat66  0.223 -0.051  -0.043   -0.043   -0.043
## 27    ses66 =~  anomia67  3.491  0.113   0.094    0.094    0.094
## 28    ses66 =~  pwless67  3.491 -0.113  -0.094   -0.094   -0.094
## 29    ses66 =~  anomia71  2.474  0.093   0.077    0.077    0.077
## 30    ses66 =~  pwless71  2.474 -0.088  -0.073   -0.073   -0.073
## 32 anomia67 ~~  anomia71 63.854  0.160   0.160    0.507    0.507
## 33 anomia67 ~~  pwless71 49.946 -0.139  -0.139   -0.395   -0.395
## 34 anomia67 ~~    educ66  6.069  0.052   0.052    0.161    0.161
## 35 anomia67 ~~ occstat66  1.261 -0.023  -0.023   -0.052   -0.052
## 36 pwless67 ~~  anomia71 49.929 -0.142  -0.142   -0.447   -0.447
## 37 pwless67 ~~  pwless71 37.398  0.120   0.120    0.341    0.341
## 38 pwless67 ~~    educ66  7.760 -0.059  -0.059   -0.181   -0.181
## 39 pwless67 ~~ occstat66  2.325  0.031   0.031    0.070    0.070
## 41 anomia71 ~~    educ66  3.631  0.039   0.039    0.130    0.130
## 42 anomia71 ~~ occstat66  0.547 -0.015  -0.015   -0.036   -0.036
## 43 pwless71 ~~    educ66  2.696 -0.033  -0.033   -0.099   -0.099
## 44 pwless71 ~~ occstat66  0.127  0.007   0.007    0.016    0.016

3.0.3 Modifying our model

sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + pwless67
  alien71 =~ anomia71 + pwless71
  ses66 =~ educ66 + occstat66
  
  # structural models
  alien67 ~ ses66
  alien71 ~ alien67 + ses66
  
  # residual correlations
  anomia67 ~~ anomia71
  pwless67 ~~ pwless71
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 933)
summary(sem.fit, standardized = TRUE)

## lavaan 0.6-9 ended normally after 30 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        17
##                                                       
##   Number of observations                           933
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 4.780
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.311
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 =~                                                            
##     anomia67          1.000                               0.774    0.775
##     pwless67          1.100    0.069   15.906    0.000    0.852    0.852
##   alien71 =~                                                            
##     anomia71          1.000                               0.805    0.806
##     pwless71          1.033    0.067   15.509    0.000    0.831    0.832
##   ses66 =~                                                              
##     educ66            1.000                               0.841    0.841
##     occstat66         0.763    0.062   12.373    0.000    0.641    0.642
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 ~                                                             
##     ses66            -0.518    0.051  -10.203    0.000   -0.563   -0.563
##   alien71 ~                                                             
##     alien67           0.590    0.050   11.903    0.000    0.567    0.567
##     ses66            -0.199    0.046   -4.339    0.000   -0.208   -0.208
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .anomia67 ~~                                                           
##    .anomia71          0.133    0.026    5.177    0.000    0.133    0.356
##  .pwless67 ~~                                                           
##    .pwless71          0.035    0.027    1.304    0.192    0.035    0.121
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .anomia67          0.400    0.038   10.443    0.000    0.400    0.400
##    .pwless67          0.274    0.043    6.364    0.000    0.274    0.274
##    .anomia71          0.351    0.041    8.540    0.000    0.351    0.351
##    .pwless71          0.308    0.043    7.082    0.000    0.308    0.308
##    .educ66            0.292    0.053    5.535    0.000    0.292    0.292
##    .occstat66         0.587    0.040   14.602    0.000    0.587    0.588
##    .alien67           0.409    0.039   10.366    0.000    0.683    0.683
##    .alien71           0.326    0.032   10.110    0.000    0.503    0.503
##     ses66             0.707    0.067   10.481    0.000    1.000    1.000

3.0.4 Indirect effects

sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + pwless67
  alien71 =~ anomia71 + pwless71
  ses66 =~ educ66 + occstat66
  
  # direct effects
  alien67 ~ b1 * ses66
  alien71 ~ a1 * alien67 + a2 * ses66
  
  # indirect effects
  ind := a1 * b1
  
  # total effect
  tot_eff := a1 * b1 + a2
  
  # residual correlations
  anomia67 ~~ anomia71
  pwless67 ~~ pwless71
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 933, std.lv = TRUE)
summary(sem.fit, standardized = TRUE)

## lavaan 0.6-9 ended normally after 30 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        17
##                                                       
##   Number of observations                           933
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 4.780
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.311
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 =~                                                            
##     anomia67          0.640    0.031   20.731    0.000    0.774    0.775
##     pwless67          0.704    0.035   20.047    0.000    0.852    0.852
##   alien71 =~                                                            
##     anomia71          0.571    0.028   20.220    0.000    0.805    0.806
##     pwless71          0.589    0.029   20.216    0.000    0.831    0.832
##   ses66 =~                                                              
##     educ66            0.841    0.040   20.962    0.000    0.841    0.841
##     occstat66         0.641    0.037   17.242    0.000    0.641    0.642
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 ~                                                             
##     ses66     (b1)   -0.681    0.061  -11.121    0.000   -0.563   -0.563
##   alien71 ~                                                             
##     alien67   (a1)    0.661    0.064   10.282    0.000    0.567    0.567
##     ses66     (a2)   -0.293    0.065   -4.512    0.000   -0.208   -0.208
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .anomia67 ~~                                                           
##    .anomia71          0.133    0.026    5.177    0.000    0.133    0.356
##  .pwless67 ~~                                                           
##    .pwless71          0.035    0.027    1.304    0.192    0.035    0.121
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .anomia67          0.400    0.038   10.443    0.000    0.400    0.400
##    .pwless67          0.274    0.043    6.364    0.000    0.274    0.274
##    .anomia71          0.351    0.041    8.540    0.000    0.351    0.351
##    .pwless71          0.308    0.043    7.082    0.000    0.308    0.308
##    .educ66            0.292    0.053    5.535    0.000    0.292    0.292
##    .occstat66         0.587    0.040   14.602    0.000    0.587    0.588
##    .alien67           1.000                               0.683    0.683
##    .alien71           1.000                               0.503    0.503
##     ses66             1.000                               1.000    1.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ind              -0.450    0.051   -8.895    0.000   -0.319   -0.319
##     tot_eff          -0.743    0.066  -11.285    0.000   -0.527   -0.527

3.1 Equality constraints

when the same conceptual variable appears more than once as a latent variable in a model, we should consider using equality constraints
- before we can say that alienation in 1967 and alienation in 1971 are measuring the same concept we need to establish the equivalence of the measurement of alienation
with SEM we can test whether indicators have the same factor loadings across latent variables. This is what invariance is
- first level requires the same set of indicators be relevant to the latent variable
  - the first level allows the loadings and the error variances for the observed measures to vary from one wave to the next
- second level adds the requirement that some of the indicators have the same loadings and other differ only by a fairly small amount
- third level adds that the loadings be invariant but allows the error variances to be free
- fourth level is when the loadings and the error variances are invariant across waves

3.2 Programming constraints

we want to compare estimates from the unstandardized model

sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + pwless67
  alien71 =~ anomia71 + pwless71
  ses66 =~ educ66 + occstat66
  
  # direct effects
  alien67 ~ b1 * ses66
  alien71 ~ a1 * alien67 + a2 * ses66
  
  # indirect effects
  ind := a1 * b1
  
  # total effect
  tot_eff := a1 * b1 + a2
  
  # residual correlations
  anomia67 ~~ anomia71
  pwless67 ~~ pwless71
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 933, 
               meanstructure = TRUE, std.lv = TRUE)
summary(sem.fit)

## lavaan 0.6-9 ended normally after 30 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        23
##                                                       
##   Number of observations                           933
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 4.780
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.311
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   alien67 =~                                          
##     anomia67          0.640    0.031   20.731    0.000
##     pwless67          0.704    0.035   20.047    0.000
##   alien71 =~                                          
##     anomia71          0.571    0.028   20.220    0.000
##     pwless71          0.589    0.029   20.216    0.000
##   ses66 =~                                            
##     educ66            0.841    0.040   20.962    0.000
##     occstat66         0.641    0.037   17.242    0.000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   alien67 ~                                           
##     ses66     (b1)   -0.681    0.061  -11.121    0.000
##   alien71 ~                                           
##     alien67   (a1)    0.661    0.064   10.282    0.000
##     ses66     (a2)   -0.293    0.065   -4.512    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##  .anomia67 ~~                                         
##    .anomia71          0.133    0.026    5.177    0.000
##  .pwless67 ~~                                         
##    .pwless71          0.035    0.027    1.304    0.192
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .anomia67          0.000    0.033    0.000    1.000
##    .pwless67          0.000    0.033    0.000    1.000
##    .anomia71          0.000    0.033    0.000    1.000
##    .pwless71          0.000    0.033    0.000    1.000
##    .educ66            0.000    0.033    0.000    1.000
##    .occstat66         0.000    0.033    0.000    1.000
##    .alien67           0.000                           
##    .alien71           0.000                           
##     ses66             0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .anomia67          0.400    0.038   10.443    0.000
##    .pwless67          0.274    0.043    6.364    0.000
##    .anomia71          0.351    0.041    8.540    0.000
##    .pwless71          0.308    0.043    7.082    0.000
##    .educ66            0.292    0.053    5.535    0.000
##    .occstat66         0.587    0.040   14.602    0.000
##    .alien67           1.000                           
##    .alien71           1.000                           
##     ses66             1.000                           
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ind              -0.450    0.051   -8.895    0.000
##     tot_eff          -0.743    0.066  -11.285    0.000

if all loadings are significant, then we meet the first level of invariance
to test for additional levels of invariance, we need to check for equivalance of loadings for pwless67 and pwless71

sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + l1 * pwless67
  alien71 =~ anomia71 + l1 * pwless71
  ses66 =~ educ66 + occstat66
  
  # direct effects
  alien67 ~ b1 * ses66
  alien71 ~ a1 * alien67 + a2 * ses66
  
  # indirect effects
  ind := a1 * b1
  
  # total effect
  tot_eff := a1 * b1 + a2
  
  # residual correlations
  anomia67 ~~ anomia71
  pwless67 ~~ pwless71
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 933, 
               std.lv = TRUE)
summary(sem.fit)

## lavaan 0.6-9 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        17
##   Number of equality constraints                     1
##                                                       
##   Number of observations                           933
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                12.722
##   Degrees of freedom                                 5
##   P-value (Chi-square)                           0.026
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   alien67 =~                                          
##     anomia67          0.612    0.029   21.142    0.000
##     pwless67  (l1)    0.640    0.025   25.679    0.000
##   alien71 =~                                          
##     anomia71          0.591    0.027   21.584    0.000
##     pwless71  (l1)    0.640    0.025   25.679    0.000
##   ses66 =~                                            
##     educ66            0.827    0.039   21.389    0.000
##     occstat66         0.647    0.037   17.678    0.000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   alien67 ~                                           
##     ses66     (b1)   -0.756    0.060  -12.522    0.000
##   alien71 ~                                           
##     alien67   (a1)    0.581    0.056   10.301    0.000
##     ses66     (a2)   -0.271    0.067   -4.028    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##  .anomia67 ~~                                         
##    .anomia71          0.135    0.025    5.478    0.000
##  .pwless67 ~~                                         
##    .pwless71          0.040    0.026    1.556    0.120
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .anomia67          0.383    0.037   10.462    0.000
##    .pwless67          0.309    0.037    8.265    0.000
##    .anomia71          0.368    0.041    8.971    0.000
##    .pwless71          0.278    0.045    6.140    0.000
##    .educ66            0.314    0.049    6.439    0.000
##    .occstat66         0.580    0.039   14.745    0.000
##    .alien67           1.000                           
##    .alien71           1.000                           
##     ses66             1.000                           
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ind              -0.439    0.052   -8.499    0.000
##     tot_eff          -0.710    0.061  -11.562    0.000

3.3 Structural model with formative indicators

reflective indicators are where the latent variable causes the response on the observed variable
formative indicators are where the latent variable is caused by the observed variable

3.3.1 Identification and estimation of a composite latent variable

sem.model <- '
  # latent variable measurements
  alien67 =~ anomia67 + l1 * pwless67
  alien71 =~ anomia71 + l1 * pwless71
  ses66 <~ 1 * educ66 + occstat66 # formative indicators
  
  # direct effects
  alien67 ~ b1 * ses66
  alien71 ~ a1 * alien67 + a2 * ses66
  
  # indirect effects
  ind := a1 * b1
  
  # total effect
  tot_eff := a1 * b1 + a2
  
  # residual correlations
  anomia67 ~~ anomia71
  pwless67 ~~ pwless71
'

sem.fit <- sem(sem.model, sample.cov = sem.data, sample.nobs = 932)
summary(sem.fit, standardized = TRUE)

## lavaan 0.6-9 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
##   Number of equality constraints                     1
##                                                       
##   Number of observations                           932
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 5.772
##   Degrees of freedom                                 5
##   P-value (Chi-square)                           0.329
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 =~                                                            
##     anomia67          1.000                               0.785    0.783
##     pwless67  (l1)    1.068    0.059   18.138    0.000    0.839    0.842
##   alien71 =~                                                            
##     anomia71          1.000                               0.792    0.795
##     pwless71  (l1)    1.068    0.059   18.138    0.000    0.846    0.843
## 
## Composites:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ses66 <~                                                              
##     educ66            1.000                               0.802    0.801
##     occstat66         0.381    0.111    3.425    0.001    0.306    0.305
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   alien67 ~                                                             
##     ses66     (b1)   -0.309    0.031   -9.961    0.000   -0.491   -0.491
##   alien71 ~                                                             
##     alien67   (a1)    0.611    0.041   15.055    0.000    0.606    0.606
##     ses66     (a2)   -0.102    0.023   -4.361    0.000   -0.161   -0.161
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .anomia67 ~~                                                           
##    .anomia71          0.134    0.026    5.197    0.000    0.134    0.355
##  .pwless67 ~~                                                           
##    .pwless71          0.034    0.027    1.256    0.209    0.034    0.117
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .anomia67          0.389    0.037   10.425    0.000    0.389    0.387
##    .pwless67          0.288    0.040    7.227    0.000    0.288    0.290
##    .anomia71          0.365    0.038    9.601    0.000    0.365    0.367
##    .pwless71          0.291    0.041    7.062    0.000    0.291    0.289
##    .alien67           0.468    0.040   11.844    0.000    0.759    0.759
##    .alien71           0.321    0.030   10.838    0.000    0.512    0.512
##     ses66             0.000                               0.000    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ind              -0.189    0.023   -8.307    0.000   -0.297   -0.297
##     tot_eff          -0.291    0.030   -9.618    0.000   -0.458   -0.458

3.3.2 Multiple indicators, multiple causes model

when we fix the error variance of ses66 to 0 and the unstandardized coefficient from educ66 to ses66 at 1 it means that our latent variable is assumed to be a perfect composite of the observed formative indicators
a multiple indicators, multiple causes model allows us to have a nonzero variance