One-Way ANOVA on SAS -- Motor Oil Example

The SAS code:
 
 
/****************************************************
A sample SAS program to analyze the Motor Oil data
****************************************************/

title 'Motor Oil analysis';
options nodate noovp linesize=68;

/****************************************************
   Virtually all SAS programs consist of a DATA step where
   the raw data is read into a SAS file, and procedure (PROC)
   step which perform various analyses
****************************************************/

 
data oil;
        /* this will read the data into an internal SAS dataset called 'oil' */

   input type $ visc;
        /* the $ indicates 'type' is a character variable */

        label type='oil type';
        label visc='viscosity';

   datalines;   /* data follows */
CNVNTNL      44
CNVNTNL      49
CNVNTNL      37
CNVNTNL      38
SYNTHET      42
SYNTHET      49
SYNTHET      52
SYNTHET      57
HYBRID       60
HYBRID       58
HYBRID       78
;

proc print      data=oil;
        /* this will print out the raw data for checking */
   title2 'raw data'; 
 
proc sort       data=oil;
   by type;
 
proc means      data=oil maxdec=2 n mean std;
        /* get simple summary statistics (sample size, sample mean and SD) with
                        max of 2 decimal places */
   title2 'simple summary statistics';
   by type;  /* statistics computed for each oil type... */
   var visc;  /* ... on the variable 'visc' */

proc plot       data=oil;
        /* request a plot of the raw data */
   title2 'plot of the raw data';
   plot visc*type;

proc anova      data=oil;

   title2 'Analysis';
   class type;
        /* class statement indicates that 'oil type' is a factor */
   model visc = type;
        /* assumes 'oil type' influences 'viscosity' */
   means type / tukey cldiff;
        /* multiple comparison by Tukey's method -- get actual C.I.'s */
   means type / tukey lines;
        /* get pictorial display of comparisons */

proc glm        data=oil;

   title2 'Proc glm Analysis';
        /* same as 'proc anova' except
                'glm' allows residual plots but gives more junk output */

   class type;
   model visc = type;
   output       out=oilfit p=yhat r=resid;
        /* store fitted values and fitted residuals
                in dataset called 'oilfit' for later use */

proc univariate         data=oilfit plot normal;
   var resid;
        /* plot qq-plot of fitted residuals */

proc plot;
   plot resid*type;
   plot resid*yhat;
        /* two residual plots to check 
                independence and constant variance */

run;
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

The SAS Output:

                         Motor Oil analysis                        
                              raw data

                       Obs     type      visc

                         1    CNVNTNL     44 
                         2    CNVNTNL     49 
                         3    CNVNTNL     37 
                         4    CNVNTNL     38 
                         5    SYNTHET     42 
                         6    SYNTHET     49 
                         7    SYNTHET     52 
                         8    SYNTHET     57 
                         9    HYBRID      60 
                        10    HYBRID      58 
                        11    HYBRID      78
                     simple summary statistics

------------------------- oil type=CNVNTNL -------------------------

                        The MEANS Procedure

                 Analysis Variable : visc viscosity
 
                  N            Mean         Std Dev
                 ----------------------------------
                  4           42.00            5.60
                 ----------------------------------

------------------------- oil type=HYBRID --------------------------

                 Analysis Variable : visc viscosity
 
                  N            Mean         Std Dev
                 ----------------------------------
                  3           65.33           11.02
                 ----------------------------------

------------------------- oil type=SYNTHET -------------------------

                 Analysis Variable : visc viscosity
 
                  N            Mean         Std Dev
                 ----------------------------------
                  4           50.00            6.27
                 ----------------------------------






                        plot of the raw data

       Plot of visc*type.  Legend: A = 1 obs, B = 2 obs, etc.

           |
           |
        78 +                        A
        77 +
        76 +
        75 +
        74 +
        73 +
        72 +
        71 +
        70 +
        69 +
        68 +
        67 +
        66 +
        65 +
        64 +
        63 +
      v 62 +
      i 61 +
      s 60 +                        A
      c 59 +
      o 58 +                        A
      s 57 +                                              A
      i 56 +
      t 55 +
      y 54 +
        53 +
        52 +                                              A
        51 +
        50 +
        49 +  A                                           A
        48 +
        47 +
        46 +
        45 +
        44 +  A
        43 +
        42 +                                              A
        41 +
        40 +
        39 +
        38 +  A
        37 +  A
           |
           ---+---------------------+---------------------+--
           CNVNTNL               HYBRID                SYNTHET

                                oil type






                        Proc anova Analysis
                        The ANOVA Procedure

                      Class Level Information
 
          Class         Levels    Values

          type               3    CNVNTNL HYBRID SYNTHET 

                    Number of observations    11
Dependent Variable: visc   viscosity

                                     Sum of
 Source                    DF       Squares   Mean Square  F Value

 Model                      2    943.515152    471.757576     8.30

 Error                      8    454.666667     56.833333         

 Corrected Total           10   1398.181818                       

                    Source                Pr > F

                    Model                 0.0112

                    Error                       

                    Corrected Total             

         R-Square     Coeff Var      Root MSE     visc Mean

         0.674816      14.70331      7.538789      51.27273

 Source                    DF      Anova SS   Mean Square  F Value

 type                       2   943.5151515   471.7575758     8.30

                    Source                Pr > F

                    Model                 0.0112

                    Error                       

                    Corrected Total             

         R-Square     Coeff Var      Root MSE     visc Mean

         0.674816      14.70331      7.538789      51.27273

 Source                    DF      Anova SS   Mean Square  F Value

 type                       2   943.5151515   471.7575758     8.30

                    Source                Pr > F

                    type                  0.0112
           Tukey's Studentized Range (HSD) Test for visc

   NOTE: This test controls the Type I experimentwise error rate.

            Alpha                                   0.05
            Error Degrees of Freedom                   8
            Error Mean Square                   56.83333
            Critical Value of Studentized Range  4.04101

  Comparisons significant at the 0.05 level are indicated by ***.
 
 
                          Difference
           type              Between     Simultaneous 95%
        Comparison             Means    Confidence Limits

     HYBRID  - SYNTHET        15.333      -1.119   31.786     
     HYBRID  - CNVNTNL        23.333       6.881   39.786  ***
     SYNTHET - HYBRID        -15.333     -31.786    1.119     
     SYNTHET - CNVNTNL         8.000      -7.232   23.232     
     CNVNTNL - HYBRID        -23.333     -39.786   -6.881  ***
     CNVNTNL - SYNTHET        -8.000     -23.232    7.232
           Tukey's Studentized Range (HSD) Test for visc

NOTE: This test controls the Type I experimentwise error rate, but 
      it generally has a higher Type II error rate than REGWQ.

            Alpha                                   0.05
            Error Degrees of Freedom                   8
            Error Mean Square                   56.83333
            Critical Value of Studentized Range  4.04101
            Minimum Significant Difference        16.056
            Harmonic Mean of Cell Sizes              3.6

                  NOTE: Cell sizes are not equal.

     Means with the same letter are not significantly different.
 
 
       Tukey Grouping          Mean      N    type

                    A        65.333      3    HYBRID 
                    A                                
               B    A        50.000      4    SYNTHET
               B                                     
               B             42.000      4    CNVNTNL
                         Proc glm Analysis

                         The GLM Procedure

                      Class Level Information
 
          Class         Levels    Values

          type               3    CNVNTNL HYBRID SYNTHET 

                    Number of observations    11
Dependent Variable: visc   viscosity

                                     Sum of
 Source                    DF       Squares   Mean Square  F Value

 Model                      2    943.515152    471.757576     8.30

 Error                      8    454.666667     56.833333         

 Corrected Total           10   1398.181818                       

                    Source                Pr > F

                    Model                 0.0112

                    Error                       

                    Corrected Total             

         R-Square     Coeff Var      Root MSE     visc Mean

         0.674816      14.70331      7.538789      51.27273

 Source                    DF     Type I SS   Mean Square  F Value

 type                       2   943.5151515   471.7575758     8.30

                    Source                Pr > F

                    type                  0.0112

 Source                    DF   Type III SS   Mean Square  F Value

 type                       2   943.5151515   471.7575758     8.30

                    Source                Pr > F

                    type                  0.0112
                      The UNIVARIATE Procedure
                          Variable:  resid

                              Moments

  N                          11    Sum Weights                 11
  Mean                        0    Sum Observations             0
  Std Deviation       6.7428975    Variance            45.4666667
  Skewness           0.59250805    Kurtosis            -0.6852474
  Uncorrected SS     454.666667    Corrected SS        454.666667
  Coeff Variation             .    Std Error Mean      2.03306009

                     Basic Statistical Measures
 
           Location                    Variability

       Mean      0.00000     Std Deviation            6.74290
       Median   -1.00000     Variance                45.46667
       Mode      2.00000     Range                   20.66667
                             Interquartile Range     12.33333

NOTE: The mode displayed is the smallest of 2 modes with a count of 
                                 2.

                     Tests for Location: Mu0=0
 
          Test           -Statistic-    -----p Value------

          Student's t    t         0    Pr > |t|    1.0000
          Sign           M      -0.5    Pr >= |M|   1.0000
          Signed Rank    S        -2    Pr >= |S|   0.8877

                        Tests for Normality
 
     Test                  --Statistic---    -----p Value------

     Shapiro-Wilk          W     0.927411    Pr < W      0.3853
     Kolmogorov-Smirnov    D     0.178027    Pr > D     >0.1500
      Cramer-von Mises      W-Sq  0.053291    Pr > W-Sq  >0.2500
     Anderson-Darling      A-Sq  0.344193    Pr > A-Sq  >0.2500

                      Quantiles (Definition 5)
 
                      Quantile       Estimate

                      100% Max       12.66667
                      99%            12.66667
                      95%            12.66667
                      90%             7.00000
                      75% Q3          7.00000
                      50% Median     -1.00000
                      25% Q1         -5.33333
                      10%            -7.33333

                      The UNIVARIATE Procedure
                          Variable:  resid

                      Quantiles (Definition 5)
 
                      Quantile       Estimate

                      5%             -8.00000
                      1%             -8.00000
                      0% Min         -8.00000

                        Extreme Observations
 
             ------Lowest-----        -----Highest-----
 
                Value      Obs           Value      Obs

             -8.00000        8          2.0000        1
             -7.33333        6          2.0000       10
             -5.33333        5          7.0000        2
             -5.00000        3          7.0000       11
             -4.00000        4         12.6667        7

          Stem Leaf                     #             Boxplot
             1 3                        1                |
             0 77                       2             +-----+
             0 22                       2             |  +  |
            -0 41                       2             *-----*
            -0 8755                     4             +-----+
               ----+----+----+----+
           Multiply Stem.Leaf by 10**+1




                           Normal Probability Plot
        12.5+                                        +*++++++
            |                                *+++*+++
         2.5+                         ++*++*++
            |                  ++*++*+*
        -7.5+         * +++*+++*
             +----+----+----+----+----+----+----+----+----+----+
                 -2        -1         0        +1        +2



      Plot of resid*type.  Legend: A = 1 obs, B = 2 obs, etc.

      resid |
            |
       12.5 +                        A
            |
            |
            |
            |
       10.0 +
            |
            |
            |
            |
        7.5 +
            |  A                                           A
            |
            |
            |
        5.0 +
            |
            |
            |
            |
        2.5 +
            |  A                                           A
            |
            |
            |
        0.0 +
            |
            |                                              A
            |
            |
       -2.5 +
            |
            |
            |  A
            |
       -5.0 +  A
            |                        A
            |
            |
            |
       -7.5 +                        A
            |                                              A
            |
            |
            |
      -10.0 +
            |
            ---+---------------------+---------------------+--
            CNVNTNL               HYBRID                SYNTHET

                                 oil type




      Plot of resid*yhat.  Legend: A = 1 obs, B = 2 obs, etc.

resid |
      |
 12.5 +                                                   A
      |
      |
      |
      |
 10.0 +
      |
      |
      |
      |
  7.5 +
      |    A               A
      |
      |
      |
  5.0 +
      |
      |
      |
      |
  2.5 +
      |    A               A
      |
      |
      |
  0.0 +
      |
      |                    A
      |
      |
 -2.5 +
      |
      |
      |    A
      |
 -5.0 +    A
      |                                                   A
      |
      |
      |
 -7.5 +                                                   A
      |                    A
      |
      |
      |
-10.0 +
      |
      -+---------+---------+---------+---------+---------+---------+
      40        45        50        55        60        65        70

                                   yhat