ch 5

Michael Taylor

2018/08/22

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
knitr::opts_chunk$set(cache=TRUE)
(dlf <- read.delim("DownloadFestival(No Outlier).dat"))
##     ticknumb gender day1 day2 day3
## 1       2111   Male 2.64 1.35 1.61
## 2       2229 Female 0.97 1.41 0.29
## 3       2338   Male 0.84   NA   NA
## 4       2384 Female 3.03   NA   NA
## 5       2401 Female 0.88 0.08   NA
## 6       2405   Male 0.85   NA   NA
## 7       2467 Female 1.56   NA   NA
## 8       2478 Female 3.02   NA   NA
## 9       2490   Male 2.29   NA   NA
## 10      2504 Female 1.11 0.44 0.55
## 11      2509   Male 2.17   NA   NA
## 12      2510 Female 0.82 0.20 0.47
## 13      2514   Male 1.41   NA   NA
## 14      2515 Female 1.76 1.64 1.58
## 15      2520   Male 1.38 0.02   NA
## 16      2521 Female 2.79   NA   NA
## 17      2529   Male 1.50   NA   NA
## 18      2533 Female 1.91 2.05   NA
## 19      2535 Female 2.32   NA   NA
## 20      2538   Male 2.05   NA   NA
## 21      2549   Male 2.17 0.70 0.76
## 22      2551 Female 2.05   NA   NA
## 23      2558 Female 1.61   NA   NA
## 24      2562 Female 1.66 0.85   NA
## 25      2565 Female 2.30   NA   NA
## 26      2566 Female 2.76   NA   NA
## 27      2568 Female 1.44   NA   NA
## 28      2586 Female 1.06   NA   NA
## 29      2601   Male 3.23   NA   NA
## 30      2602   Male 0.97 0.38 0.76
## 31      2604   Male 2.57 0.11 0.02
## 32      2606 Female 0.26   NA   NA
## 33      2609 Female 0.47   NA   NA
## 34      2611 Female 1.73   NA   NA
## 35      2612   Male 1.94 0.82 1.67
## 36      2616   Male 1.91   NA   NA
## 37      2624 Female 2.08 0.91 0.96
## 38      2633 Female 1.91   NA   NA
## 39      2642 Female 1.42   NA   NA
## 40      2644   Male 1.50   NA   NA
## 41      2662 Female 0.11   NA   NA
## 42      2663   Male 1.67   NA   NA
## 43      2664   Male 2.08   NA   NA
## 44      2669 Female 2.05   NA   NA
## 45      2670   Male 2.00   NA   NA
## 46      2675   Male 1.52   NA   NA
## 47      2676 Female 1.58   NA   NA
## 48      2677   Male 1.28 0.38 0.14
## 49      2681   Male 1.88   NA   NA
## 50      2685 Female 1.32   NA   NA
## 51      2686 Female 2.09   NA   NA
## 52      2687   Male 2.00   NA   NA
## 53      2688 Female 2.64   NA   NA
## 54      2689   Male 0.85 0.32 0.52
## 55      2692 Female 2.47 0.23 0.38
## 56      2698 Female 1.79   NA   NA
## 57      2709 Female 1.64   NA   NA
## 58      2710   Male 1.32   NA   NA
## 59      2712 Female 2.97   NA   NA
## 60      2726 Female 1.44 0.14   NA
## 61      2727 Female 2.02   NA   NA
## 62      2728   Male 1.79   NA   NA
## 63      2730   Male 1.34   NA   NA
## 64      2731 Female 2.29 1.90   NA
## 65      2732 Female 1.66   NA   NA
## 66      2734 Female 0.60   NA   NA
## 67      2735 Female 1.76 0.76 0.29
## 68      2736 Female 1.50   NA   NA
## 69      2737 Female 2.08 0.70   NA
## 70      2748 Female 1.00 0.55   NA
## 71      2752   Male 1.73   NA   NA
## 72      2756 Female 1.05 0.38   NA
## 73      2758   Male 2.81   NA   NA
## 74      2759 Female 1.52   NA   NA
## 75      2768   Male 1.47   NA   NA
## 76      2769 Female 2.64   NA   NA
## 77      2770 Female 2.20 1.18   NA
## 78      2771   Male 0.55 0.79   NA
## 79      2772   Male 2.29   NA   NA
## 80      2773 Female 2.00   NA   NA
## 81      2775   Male 2.23   NA   NA
## 82      2779 Female 2.45   NA   NA
## 83      2780   Male 1.20   NA   NA
## 84      2783   Male 2.91 2.08 2.11
## 85      2784 Female 1.14 1.00   NA
## 86      2788   Male 1.88   NA   NA
## 87      2789   Male 0.94   NA   NA
## 88      2791 Female 1.85   NA   NA
## 89      2794 Female 2.58   NA   NA
## 90      2796 Female 0.61   NA   NA
## 91      2799   Male 0.70   NA   NA
## 92      2806   Male 1.38   NA   NA
## 93      2807 Female 1.94   NA   NA
## 94      2811 Female 2.29   NA   NA
## 95      2812   Male 1.59   NA   NA
## 96      2813 Female 2.46   NA   NA
## 97      2814 Female 1.67 0.14   NA
## 98      2825 Female 2.02 0.58 0.50
## 99      2827   Male 1.50   NA   NA
## 100     2828 Female 2.70 1.70 1.91
## 101     2829 Female 1.61   NA   NA
## 102     2830   Male 2.29   NA   NA
## 103     2831 Female 0.97 1.06 0.76
## 104     2839   Male 1.85   NA   NA
## 105     2842 Female 2.76   NA   NA
## 106     2857   Male 1.64   NA   NA
## 107     2858   Male 1.17   NA   NA
## 108     2859   Male 1.57   NA   NA
## 109     2862 Female 2.23   NA   NA
## 110     2863 Female 2.05 1.58 2.15
## 111     2875 Female 2.05   NA   NA
## 112     2876 Female 2.94   NA   NA
## 113     2883 Female 2.39   NA   NA
## 114     2890   Male 1.94   NA   NA
## 115     2894 Female 2.12   NA   NA
## 116     2895 Female 1.11   NA   NA
## 117     2897 Female 0.97   NA   NA
## 118     2898 Female 1.35   NA   NA
## 119     2899 Female 2.81 2.08   NA
## 120     2900 Female 2.50   NA   NA
## 121     2901   Male 1.87   NA   NA
## 122     2920   Male 1.33   NA   NA
## 123     2921 Female 1.26   NA   NA
## 124     2922 Female 1.44   NA   NA
## 125     2923   Male 0.55   NA   NA
## 126     2924 Female 1.75   NA   NA
## 127     2925 Female 2.08   NA   NA
## 128     2932   Male 0.85   NA   NA
## 129     2933 Female 2.52   NA   NA
## 130     2935 Female 3.00   NA   NA
## 131     2936 Female 1.41   NA   NA
## 132     2937 Female 1.08   NA   NA
## 133     2938   Male 1.20 1.38 1.50
## 134     2940   Male 1.94 1.44   NA
## 135     2941 Female 2.26 1.73 1.73
## 136     2942   Male 1.41   NA   NA
## 137     2948 Female 2.50   NA   NA
## 138     2952   Male 2.17   NA   NA
## 139     2953   Male 1.82 1.11   NA
## 140     2954 Female 1.44 1.14   NA
## 141     2956 Female 1.66   NA   NA
## 142     2957   Male 1.82   NA   NA
## 143     2958   Male 1.26   NA   NA
## 144     2959 Female 2.67   NA   NA
## 145     2961 Female 1.47   NA   NA
## 146     2962 Female 1.84   NA   NA
## 147     2964 Female 2.58   NA   NA
## 148     2966   Male 1.73   NA   NA
## 149     2967   Male 1.23   NA   NA
## 150     2968   Male 2.32   NA   NA
## 151     2972 Female 2.67   NA   NA
## 152     2974 Female 1.02   NA   NA
## 153     2975 Female 1.66 2.12 2.70
## 154     2976 Female 1.88   NA   NA
## 155     2977 Female 1.91   NA   NA
## 156     2978 Female 1.64   NA   NA
## 157     2979   Male 1.34   NA   NA
## 158     2982   Male 1.85   NA   NA
## 159     2983   Male 2.08   NA   NA
## 160     2984   Male 1.02   NA   NA
## 161     2985 Female 1.79   NA   NA
## 162     2988 Female 1.94   NA   NA
## 163     2989 Female 3.26 1.97 1.67
## 164     2990   Male 1.14 0.58 0.11
## 165     3008 Female 1.50 0.70 0.38
## 166     3009 Female 2.03   NA   NA
## 167     3010 Female 2.24   NA   NA
## 168     3013 Female 1.11   NA   NA
## 169     3014 Female 2.21   NA   NA
## 170     3016   Male 1.94   NA   NA
## 171     3017   Male 2.41   NA   NA
## 172     3018   Male 0.88   NA   NA
## 173     3019 Female 1.17 1.35   NA
## 174     3027   Male 2.23   NA   NA
## 175     3028 Female 1.64   NA   NA
## 176     3029 Female 2.14   NA   NA
## 177     3030   Male 0.11 0.29   NA
## 178     3031   Male 2.17   NA   NA
## 179     3032 Female 1.67   NA   NA
## 180     3033 Female 1.00   NA   NA
## 181     3034 Female 0.88   NA   NA
## 182     3035   Male 2.20   NA   NA
## 183     3048   Male 2.17   NA   NA
## 184     3049 Female 2.32   NA   NA
## 185     3051   Male 1.64   NA   NA
## 186     3052 Female 3.00   NA   NA
## 187     3053 Female 2.38 0.85   NA
## 188     3054 Female 1.60 1.02   NA
## 189     3055 Female 1.58   NA   NA
## 190     3056 Female 2.61   NA   NA
## 191     3057   Male 1.44 0.05 0.20
## 192     3064   Male 1.57   NA   NA
## 193     3065 Female 2.32   NA   NA
## 194     3068 Female 1.14   NA   NA
## 195     3069   Male 1.93   NA   NA
## 196     3070   Male 2.47   NA   NA
## 197     3072 Female 2.29   NA   NA
## 198     3073 Female 1.00   NA   NA
## 199     3092 Female 1.58   NA   NA
## 200     3093   Male 2.44   NA   NA
## 201     3094 Female 0.83   NA   NA
## 202     3095   Male 2.71 0.78 0.33
## 203     3096 Female 1.73   NA   NA
## 204     3097   Male 1.58   NA   NA
## 205     3098   Male 1.50   NA   NA
## 206     3100 Female 1.05   NA   NA
## 207     3106   Male 2.05   NA   NA
## 208     3107 Female 2.63   NA   NA
## 209     3109   Male 2.55 2.29   NA
## 210     3111 Female 2.00   NA   NA
## 211     3112   Male 2.00   NA   NA
## 212     3114 Female 1.32   NA   NA
## 213     3116 Female 3.14   NA   NA
## 214     3118   Male 1.44   NA   NA
## 215     3129   Male 1.85 0.23   NA
## 216     3131 Female 1.41 0.44   NA
## 217     3132 Female 1.94   NA   NA
## 218     3133   Male 2.91   NA   NA
## 219     3135 Female 1.85   NA   NA
## 220     3136   Male 1.70   NA   NA
## 221     3137 Female 2.23   NA   NA
## 222     3138   Male 1.11   NA   NA
## 223     3139 Female 1.47   NA   NA
## 224     3146 Female 2.20   NA   NA
## 225     3147   Male 1.82   NA   NA
## 226     3148 Female 1.42   NA   NA
## 227     3168 Female 2.44   NA   NA
## 228     3171 Female 2.66   NA   NA
## 229     3172   Male 1.52   NA   NA
## 230     3173 Female 1.35 0.47 0.73
## 231     3180 Female 1.29   NA   NA
## 232     3182 Female 2.32   NA   NA
## 233     3190   Male 0.78   NA   NA
## 234     3192 Female 2.84   NA   NA
## 235     3202   Male 0.97   NA   NA
## 236     3236 Female 1.52   NA   NA
## 237     3245 Female 1.70   NA   NA
## 238     3246 Female 0.94 1.17 1.29
## 239     3247   Male 1.41   NA   NA
## 240     3248   Male 1.79   NA   NA
## 241     3249   Male 1.08 0.44 0.44
## 242     3250   Male 1.47   NA   NA
## 243     3251 Female 1.79 0.47   NA
## 244     3253   Male 2.00   NA   NA
## 245     3254 Female 0.76   NA   NA
## 246     3255   Male 2.20   NA   NA
## 247     3256 Female 0.94 0.17   NA
## 248     3257 Female 1.38   NA   NA
## 249     3258 Female 1.38 0.85   NA
## 250     3260   Male 0.32   NA   NA
## 251     3261   Male 2.58   NA   NA
## 252     3262   Male 0.51   NA   NA
## 253     3264 Female 0.32   NA   NA
## 254     3267 Female 0.91 1.11 1.70
## 255     3273   Male 1.51   NA   NA
## 256     3275 Female 1.47   NA   NA
## 257     3276 Female 2.50   NA   NA
## 258     3277 Female 2.26   NA   NA
## 259     3278 Female 2.81   NA   NA
## 260     3279 Female 1.87   NA   NA
## 261     3281 Female 2.00   NA   NA
## 262     3282 Female 2.23 0.41 1.02
## 263     3284   Male 2.00 0.76   NA
## 264     3290 Female 1.41   NA   NA
## 265     3291   Male 1.64   NA   NA
## 266     3292   Male 1.64   NA   NA
## 267     3296   Male 1.26   NA   NA
## 268     3306 Female 1.52 0.55 1.88
## 269     3307   Male 2.44 1.02 0.76
## 270     3308 Female 2.18   NA   NA
## 271     3309 Female 3.02   NA   NA
## 272     3310 Female 1.02   NA   NA
## 273     3311 Female 2.88   NA   NA
## 274     3312   Male 1.54   NA   NA
## 275     3313 Female 1.64   NA   NA
## 276     3314 Female 2.44 2.50 1.70
## 277     3315 Female 1.29   NA   NA
## 278     3316 Female 1.61 0.32 0.26
## 279     3321 Female 1.77   NA   NA
## 280     3325   Male 0.91 0.17   NA
## 281     3326 Female 0.85 0.20 0.38
## 282     3327   Male 0.85 0.52 0.44
## 283     3328 Female 1.50   NA   NA
## 284     3329   Male 1.05 0.23   NA
## 285     3338 Female 3.38   NA   NA
## 286     3340 Female 1.42 0.52 2.00
## 287     3341 Female 1.85   NA   NA
## 288     3348 Female 1.91 0.84   NA
## 289     3349   Male 0.82 0.26   NA
## 290     3350 Female 1.32 0.76   NA
## 291     3351 Female 2.23 0.85 0.39
## 292     3352 Female 1.47 1.52 0.17
## 293     3363 Female 2.70   NA   NA
## 294     3365   Male 1.58   NA   NA
## 295     3366   Male 1.00   NA   NA
## 296     3367 Female 1.44   NA   NA
## 297     3368 Female 2.00   NA   NA
## 298     3369   Male 1.60   NA   NA
## 299     3370 Female 2.32 2.53 1.67
## 300     3371 Female 3.41   NA   NA
## 301     3372 Female 2.02   NA   NA
## 302     3373   Male 0.64 0.52   NA
## 303     3374   Male 3.58 3.35   NA
## 304     3375   Male 1.50   NA   NA
## 305     3376   Male 1.08   NA   NA
## 306     3377 Female 1.52   NA   NA
## 307     3378   Male 1.26   NA   NA
## 308     3379 Female 1.68   NA   NA
## 309     3380   Male 1.47 1.08 0.58
## 310     3390 Female 1.47   NA   NA
## 311     3391 Female 1.67 1.55   NA
## 312     3392 Female 2.47 1.97   NA
## 313     3393   Male 1.82   NA   NA
## 314     3394 Female 2.17   NA   NA
## 315     3395 Female 3.21   NA   NA
## 316     3397 Female 1.60 1.38 1.02
## 317     3398 Female 0.32   NA   NA
## 318     3407 Female 0.55   NA   NA
## 319     3411 Female 1.42   NA   NA
## 320     3412   Male 1.14   NA   NA
## 321     3413 Female 2.64   NA   NA
## 322     3416 Female 2.58   NA   NA
## 323     3418 Female 2.02   NA   NA
## 324     3419   Male 2.00   NA   NA
## 325     3420 Female 2.90   NA   NA
## 326     3423   Male 1.82   NA   NA
## 327     3429 Female 0.50   NA   NA
## 328     3431   Male 1.53   NA   NA
## 329     3449 Female 2.48   NA   NA
## 330     3450 Female 2.05   NA   NA
## 331     3453   Male 2.52   NA   NA
## 332     3454 Female 1.88   NA   NA
## 333     3455   Male 2.73   NA   NA
## 334     3456 Female 2.88   NA   NA
## 335     3457 Female 1.67   NA   NA
## 336     3458 Female 1.93   NA   NA
## 337     3460 Female 1.67   NA   NA
## 338     3461   Male 1.20   NA   NA
## 339     3464 Female 2.75   NA   NA
## 340     3467 Female 1.94 0.97   NA
## 341     3468 Female 0.59   NA   NA
## 342     3469 Female 1.50   NA   NA
## 343     3477   Male 1.58 0.94 0.94
## 344     3480 Female 2.23 0.11 0.17
## 345     3483 Female 2.35   NA   NA
## 346     3490 Female 2.55 0.82 0.29
## 347     3493   Male 1.55   NA   NA
## 348     3494 Female 2.31   NA   NA
## 349     3495   Male 2.23   NA   NA
## 350     3500 Female 0.67 0.50   NA
## 351     3501   Male 2.51   NA   NA
## 352     3503   Male 1.08 0.58 0.61
## 353     3510 Female 2.44   NA   NA
## 354     3511 Female 0.23 0.14   NA
## 355     3512 Female 2.17   NA   NA
## 356     3518   Male 1.90 1.17   NA
## 357     3519 Female 1.67 0.44   NA
## 358     3521 Female 2.00 0.58 0.52
## 359     3522 Female 2.44   NA   NA
## 360     3523   Male 1.44   NA   NA
## 361     3524   Male 0.82   NA   NA
## 362     3525 Female 2.50   NA   NA
## 363     3526   Male 1.82   NA   NA
## 364     3536 Female 1.97   NA   NA
## 365     3539 Female 2.52   NA   NA
## 366     3540 Female 0.05   NA   NA
## 367     3542 Female 2.08   NA   NA
## 368     3545 Female 2.39   NA   NA
## 369     3546   Male 1.45 0.82   NA
## 370     3551   Male 2.58   NA   NA
## 371     3557 Female 2.12   NA   NA
## 372     3565 Female 2.02 0.76 0.55
## 373     3567   Male 1.78 1.14 0.44
## 374     3568   Male 0.73 0.17 0.76
## 375     3569 Female 2.26 0.90 1.85
## 376     3570 Female 2.79   NA   NA
## 377     3571   Male 0.43 0.67 0.14
## 378     3572   Male 0.52 0.38 0.70
## 379     3573 Female 2.32   NA   NA
## 380     3574   Male 2.22   NA   NA
## 381     3575   Male 0.58   NA   NA
## 382     3576   Male 2.00   NA   NA
## 383     3577 Female 0.70   NA   NA
## 384     3586   Male 1.00   NA   NA
## 385     3587   Male 0.30   NA   NA
## 386     3588   Male 1.52   NA   NA
## 387     3593 Female 1.58 0.35   NA
## 388     3594   Male 2.34   NA   NA
## 389     3595 Female 0.79   NA   NA
## 390     3596 Female 2.26   NA   NA
## 391     3597   Male 2.35   NA   NA
## 392     3598 Female 1.70   NA   NA
## 393     3599 Female 3.09   NA   NA
## 394     3600 Female 1.52   NA   NA
## 395     3601 Female 0.35   NA   NA
## 396     3602 Female 2.70   NA   NA
## 397     3603 Female 1.64   NA   NA
## 398     3605   Male 0.82   NA   NA
## 399     3606   Male 2.73   NA   NA
## 400     3607 Female 2.23   NA   NA
## 401     3609 Female 1.06   NA   NA
## 402     3610   Male 2.05 0.20 0.35
## 403     3613 Female 1.73 1.44   NA
## 404     3614 Female 0.93 0.91   NA
## 405     3620 Female 2.50 2.44   NA
## 406     3621   Male 1.44   NA   NA
## 407     3622 Female 2.88   NA   NA
## 408     3625 Female 0.67 0.23 0.44
## 409     3626   Male 1.85 0.35   NA
## 410     3627 Female 1.21 0.79   NA
## 411     3628   Male 1.06 0.76 0.70
## 412     3629   Male 0.61 0.26 0.33
## 413     3631 Female 2.00   NA   NA
## 414     3634 Female 1.17 0.73 1.17
## 415     3635 Female 1.48 0.79 1.55
## 416     3645 Female 1.55   NA   NA
## 417     3646 Female 3.29   NA   NA
## 418     3647   Male 1.47   NA   NA
## 419     3648   Male 0.96   NA   NA
## 420     3649   Male 1.00 1.11 1.20
## 421     3652   Male 1.47   NA   NA
## 422     3653 Female 2.55 2.38   NA
## 423     3654 Female 0.44 0.06   NA
## 424     3655 Female 2.35 2.41   NA
## 425     3656 Female 1.71 0.85   NA
## 426     3659   Male 1.84 0.58 0.70
## 427     3660 Female 1.11 0.23 0.55
## 428     3667 Female 1.38   NA   NA
## 429     3669   Male 0.88   NA   NA
## 430     3670 Female 0.94   NA   NA
## 431     3672 Female 1.91   NA   NA
## 432     3676   Male 2.76   NA   NA
## 433     3677   Male 1.55 0.32 0.47
## 434     3678   Male 2.67   NA   NA
## 435     3680 Female 1.03 0.29 0.72
## 436     3696 Female 2.50   NA   NA
## 437     3697   Male 1.64   NA   NA
## 438     3698 Female 2.26   NA   NA
## 439     3709 Female 2.14   NA   NA
## 440     3710   Male 0.52   NA   NA
## 441     3711   Male 1.08   NA   NA
## 442     3712   Male 1.69   NA   NA
## 443     3713   Male 2.73   NA   NA
## 444     3714   Male 1.91   NA   NA
## 445     3715   Male 1.73   NA   NA
## 446     3716 Female 3.21   NA   NA
## 447     3717 Female 2.11   NA   NA
## 448     3718 Female 2.05   NA   NA
## 449     3719   Male 2.17   NA   NA
## 450     3720 Female 2.17   NA   NA
## 451     3721 Female 2.30   NA   NA
## 452     3722 Female 2.56   NA   NA
## 453     3723 Female 2.11 0.41 0.47
## 454     3724   Male 1.70   NA   NA
## 455     3725 Female 1.23   NA   NA
## 456     3726 Female 3.20   NA   NA
## 457     3727 Female 2.02   NA   NA
## 458     3728 Female 2.64   NA   NA
## 459     3729   Male 2.52 0.14   NA
## 460     3730   Male 1.61   NA   NA
## 461     3731   Male 1.50 1.20 0.91
## 462     3734   Male 1.15 0.45 0.44
## 463     3736   Male 1.82   NA   NA
## 464     3753 Female 1.50   NA   NA
## 465     3754 Female 2.32   NA   NA
## 466     3759 Female 2.92   NA   NA
## 467     3764   Male 1.41   NA   NA
## 468     3769   Male 1.35   NA   NA
## 469     3778   Male 0.61 0.14   NA
## 470     3779 Female 0.73   NA   NA
## 471     3782 Female 2.23 1.88   NA
## 472     3783 Female 1.32 0.91   NA
## 473     3784 Female 2.94 1.79   NA
## 474     3785   Male 1.61   NA   NA
## 475     3786 Female 1.00   NA   NA
## 476     3787 Female 3.15 3.00   NA
## 477     3788 Female 2.88   NA   NA
## 478     3791 Female 2.09 1.21   NA
## 479     3792 Female 1.32 1.70 2.29
## 480     3793   Male 1.47 0.35   NA
## 481     3794   Male 1.61   NA   NA
## 482     3796   Male 2.20 1.50 1.88
## 483     3797 Female 2.78   NA   NA
## 484     3798 Female 2.06   NA   NA
## 485     3799 Female 0.47   NA   NA
## 486     3804 Female 2.87   NA   NA
## 487     3826   Male 1.14   NA   NA
## 488     3828 Female 3.32 3.21   NA
## 489     3831 Female 2.08 1.38   NA
## 490     3836 Female 2.38 2.50   NA
## 491     3837 Female 2.08   NA   NA
## 492     3838   Male 1.85   NA   NA
## 493     3840   Male 1.38   NA   NA
## 494     3844   Male 1.14   NA   NA
## 495     3846   Male 1.58   NA   NA
## 496     3849   Male 1.23 0.70 1.02
## 497     3850 Female 2.53   NA   NA
## 498     3851   Male 0.67   NA   NA
## 499     3854 Female 0.73   NA   NA
## 500     3855 Female 1.34   NA   NA
## 501     3856 Female 2.14 0.70   NA
## 502     3857 Female 1.00   NA   NA
## 503     3858   Male 1.35   NA   NA
## 504     3859   Male 1.94 0.79   NA
## 505     3860   Male 0.50   NA   NA
## 506     3869 Female 3.08   NA   NA
## 507     3870 Female 2.88   NA   NA
## 508     3871 Female 1.91   NA   NA
## 509     3872   Male 1.41   NA   NA
## 510     3873   Male 2.02   NA   NA
## 511     3874 Female 0.76   NA   NA
## 512     3875   Male 1.94   NA   NA
## 513     3876   Male 0.67 0.28   NA
## 514     3879 Female 2.41   NA   NA
## 515     3880 Female 2.17   NA   NA
## 516     3882 Female 2.67 0.41   NA
## 517     3883   Male 1.94 0.64   NA
## 518     3888 Female 2.05 0.85 1.70
## 519     3890   Male 2.17   NA   NA
## 520     3909   Male 0.47   NA   NA
## 521     3912   Male 0.62 0.76   NA
## 522     3913 Female 2.00   NA   NA
## 523     3914   Male 0.45   NA   NA
## 524     3916 Female 2.29 0.91 1.38
## 525     3917 Female 2.55   NA   NA
## 526     3918 Female 0.82   NA   NA
## 527     3919 Female 3.12 2.20   NA
## 528     3920   Male 2.50 2.23 0.41
## 529     3921   Male 1.79   NA   NA
## 530     3922 Female 2.28   NA   NA
## 531     3923   Male 0.58 1.05 0.58
## 532     3924 Female 2.50 1.29   NA
## 533     3925 Female 1.41   NA   NA
## 534     3926 Female 2.14   NA   NA
## 535     3927   Male 0.76 0.26   NA
## 536     3929   Male 1.79 1.11   NA
## 537     3930   Male 1.02 0.35   NA
## 538     3932 Female 2.62   NA   NA
## 539     3942   Male 0.88   NA   NA
## 540     3944   Male 1.58 0.20 0.20
## 541     3945 Female 2.20   NA   NA
## 542     3946 Female 1.14   NA   NA
## 543     3947   Male 1.47 0.52 0.35
## 544     3948 Female 1.41 0.23 0.38
## 545     3949   Male 1.44 1.76 1.18
## 546     3950 Female 1.23 1.17   NA
## 547     3951 Female 1.82   NA   NA
## 548     3952 Female 2.44   NA   NA
## 549     3954 Female 1.94 1.20   NA
## 550     3955   Male 2.41   NA   NA
## 551     3964 Female 2.27   NA   NA
## 552     3965   Male 1.79 0.23   NA
## 553     3966   Male 1.88   NA   NA
## 554     3972   Male 1.85 0.64 1.32
## 555     3973 Female 2.21   NA   NA
## 556     3986   Male 1.97 1.94   NA
## 557     3987 Female 2.51   NA   NA
## 558     3988 Female 2.05   NA   NA
## 559     3989 Female 1.29 1.00 0.82
## 560     3990 Female 2.05   NA   NA
## 561     3991 Female 2.23   NA   NA
## 562     3992 Female 1.76   NA   NA
## 563     3993   Male 1.05   NA   NA
## 564     3994   Male 1.79   NA   NA
## 565     3995 Female 1.02 0.73 0.47
## 566     3996 Female 2.76 1.58   NA
## 567     3997 Female 1.67 0.55   NA
## 568     4010   Male 2.85   NA   NA
## 569     4011 Female 0.23 0.84   NA
## 570     4012 Female 1.90   NA   NA
## 571     4013   Male 1.23 0.52   NA
## 572     4014   Male 1.97   NA   NA
## 573     4015 Female 1.50   NA   NA
## 574     4016 Female 3.69   NA   NA
## 575     4017 Female 0.50   NA   NA
## 576     4022 Female 2.18   NA   NA
## 577     4023 Female 2.17   NA   NA
## 578     4024 Female 1.58   NA   NA
## 579     4025   Male 2.88   NA   NA
## 580     4030   Male 2.52   NA   NA
## 581     4032 Female 2.20   NA   NA
## 582     4033   Male 1.73   NA   NA
## 583     4034 Female 2.23   NA   NA
## 584     4079   Male 1.97   NA   NA
## 585     4089   Male 1.20 0.67 0.91
## 586     4090   Male 2.00   NA   NA
## 587     4092 Female 1.91   NA   NA
## 588     4093   Male 0.81   NA   NA
## 589     4095 Female 1.31   NA   NA
## 590     4096 Female 0.38   NA   NA
## 591     4101 Female 1.97   NA   NA
## 592     4104 Female 0.38   NA   NA
## 593     4105 Female 2.11 0.76 0.85
## 594     4106 Female 3.20   NA   NA
## 595     4107 Female 0.02   NA   NA
## 596     4109 Female 2.56   NA   NA
## 597     4110 Female 2.02   NA   NA
## 598     4111 Female 2.30   NA   NA
## 599     4114   Male 2.02   NA   NA
## 600     4147 Female 2.05   NA   NA
## 601     4148 Female 1.70   NA   NA
## 602     4149   Male 1.61   NA   NA
## 603     4150   Male 0.73   NA   NA
## 604     4151   Male 2.50 1.64   NA
## 605     4152 Female 2.18 1.75 1.91
## 606     4153 Female 2.46 1.08 1.91
## 607     4154 Female 1.50 0.91   NA
## 608     4155 Female 1.73 0.94   NA
## 609     4156   Male 1.44   NA   NA
## 610     4157   Male 1.64 0.32 0.52
## 611     4158 Female 2.02 2.44   NA
## 612     4159 Female 1.20 0.17 0.20
## 613     4160   Male 0.38 0.02   NA
## 614     4161 Female 1.58 1.54 1.76
## 615     4162 Female 1.67 0.50   NA
## 616     4163 Female 1.00 0.48   NA
## 617     4164 Female 2.58 1.35   NA
## 618     4165 Female 2.82 2.61 3.02
## 619     4166 Female 2.29 2.05   NA
## 620     4167 Female 1.14   NA   NA
## 621     4168 Female 1.64 0.76   NA
## 622     4171 Female 1.82 0.08   NA
## 623     4172 Female 3.32 2.91 3.02
## 624     4193   Male 3.32   NA   NA
## 625     4194   Male 1.85 1.00 1.50
## 626     4195   Male 2.29   NA   NA
## 627     4196 Female 1.47 0.47 1.58
## 628     4197   Male 2.08 0.70 0.67
## 629     4198   Male 2.20   NA   NA
## 630     4199 Female 1.06 1.45   NA
## 631     4216 Female 0.97 0.14 0.02
## 632     4218   Male 2.00   NA   NA
## 633     4219 Female 1.67 0.38   NA
## 634     4220 Female 2.94   NA   NA
## 635     4221 Female 1.55   NA   NA
## 636     4222   Male 0.88 0.26   NA
## 637     4223 Female 1.35 2.32   NA
## 638     4224 Female 0.61 0.20 0.17
## 639     4225   Male 1.00   NA   NA
## 640     4226 Female 1.52 2.72 0.52
## 641     4234   Male 1.00   NA   NA
## 642     4235   Male 1.76 0.41 0.88
## 643     4236 Female 2.52   NA   NA
## 644     4237 Female 2.00 0.88 1.44
## 645     4238 Female 2.63   NA   NA
## 646     4239 Female 0.73 0.85   NA
## 647     4240   Male 1.58 0.23 1.66
## 648     4241 Female 0.58   NA   NA
## 649     4245 Female 1.67   NA   NA
## 650     4247 Female 1.47   NA   NA
## 651     4249 Female 1.81   NA   NA
## 652     4256   Male 1.91   NA   NA
## 653     4257   Male 1.06   NA   NA
## 654     4259   Male 1.47 1.23 0.94
## 655     4262   Male 2.52   NA   NA
## 656     4263 Female 1.85 0.20   NA
## 657     4264   Male 3.44   NA   NA
## 658     4265 Female 1.55 1.32 1.19
## 659     4266 Female 2.29 2.70   NA
## 660     4267 Female 1.76   NA   NA
## 661     4268   Male 1.90   NA   NA
## 662     4269   Male 2.52 2.55   NA
## 663     4271 Female 2.52   NA   NA
## 664     4274 Female 2.82 0.17   NA
## 665     4281 Female 2.02   NA   NA
## 666     4284 Female 1.29   NA   NA
## 667     4285   Male 1.26   NA   NA
## 668     4287 Female 0.94   NA   NA
## 669     4291 Female 2.00 1.13 0.53
## 670     4292 Female 0.73   NA   NA
## 671     4294   Male 2.26 0.79   NA
## 672     4295 Female 2.23   NA   NA
## 673     4297   Male 2.35   NA   NA
## 674     4298 Female 0.55 0.38   NA
## 675     4301 Female 1.85   NA   NA
## 676     4302   Male 0.67   NA   NA
## 677     4305   Male 1.85 1.00   NA
## 678     4306   Male 1.23 0.20   NA
## 679     4308 Female 2.35   NA   NA
## 680     4309   Male 1.35   NA   NA
## 681     4310 Female 1.94   NA   NA
## 682     4311   Male 1.55   NA   NA
## 683     4312 Female 1.29   NA   NA
## 684     4313   Male 2.17   NA   NA
## 685     4314 Female 1.91   NA   NA
## 686     4333 Female 2.88   NA   NA
## 687     4334 Female 2.36   NA   NA
## 688     4335 Female 2.36   NA   NA
## 689     4336 Female 2.20   NA   NA
## 690     4349 Female 2.17 0.47 0.38
## 691     4351   Male 0.52   NA   NA
## 692     4352   Male 0.32   NA   NA
## 693     4353 Female 1.52 0.55   NA
## 694     4354   Male 2.00 0.94 0.08
## 695     4355   Male 1.32 1.02 1.20
## 696     4356 Female 2.05   NA   NA
## 697     4357 Female 1.73   NA   NA
## 698     4358   Male 1.94   NA   NA
## 699     4359 Female 1.81   NA   NA
## 700     4360   Male 0.90 0.64   NA
## 701     4361   Male 1.58 0.67   NA
## 702     4363 Female 2.29 1.87 0.91
## 703     4366 Female 2.57   NA   NA
## 704     4370 Female 1.58   NA   NA
## 705     4382 Female 2.33 0.82   NA
## 706     4383 Female 3.15   NA   NA
## 707     4384 Female 2.29   NA   NA
## 708     4385 Female 0.82   NA   NA
## 709     4386 Female 1.93   NA   NA
## 710     4387 Female 1.82   NA   NA
## 711     4388 Female 1.96   NA   NA
## 712     4389 Female 1.32 0.64 0.47
## 713     4391 Female 1.02   NA   NA
## 714     4392 Female 1.14   NA   NA
## 715     4393 Female 2.32   NA   NA
## 716     4394   Male 2.16   NA   NA
## 717     4396 Female 2.42 1.70   NA
## 718     4397 Female 1.14   NA   NA
## 719     4398   Male 1.55 0.79 1.76
## 720     4404 Female 1.17   NA   NA
## 721     4405 Female 1.00 0.58 0.32
## 722     4410 Female 1.05 0.11   NA
## 723     4419 Female 1.38   NA   NA
## 724     4428 Female 1.93 2.42   NA
## 725     4430 Female 2.73   NA   NA
## 726     4435 Female 2.02   NA   NA
## 727     4436 Female 2.81   NA   NA
## 728     4437 Female 2.47   NA   NA
## 729     4439   Male 1.35   NA   NA
## 730     4440   Male 2.08   NA   NA
## 731     4441 Female 2.50   NA   NA
## 732     4442 Female 2.45   NA   NA
## 733     4443 Female 2.17   NA   NA
## 734     4444   Male 1.70 0.00   NA
## 735     4445   Male 0.70 0.23 0.45
## 736     4446   Male 1.51   NA   NA
## 737     4447 Female 1.23   NA   NA
## 738     4448 Female 2.14 0.85   NA
## 739     4452   Male 1.14 1.14   NA
## 740     4453 Female 0.96   NA   NA
## 741     4454   Male 1.52 1.14 1.26
## 742     4455   Male 0.52   NA   NA
## 743     4456   Male 1.56   NA   NA
## 744     4459 Female 3.29 0.26   NA
## 745     4464   Male 0.45   NA   NA
## 746     4467 Female 2.63   NA   NA
## 747     4468 Female 1.70   NA   NA
## 748     4470   Male 3.11   NA   NA
## 749     4479 Female 1.82   NA   NA
## 750     4481 Female 1.58 0.14   NA
## 751     4482 Female 2.73   NA   NA
## 752     4485 Female 1.50 1.14   NA
## 753     4486   Male 1.78 1.02   NA
## 754     4488 Female 2.02   NA   NA
## 755     4507   Male 0.67 0.94 0.91
## 756     4509 Female 1.41 0.55   NA
## 757     4510   Male 0.90   NA   NA
## 758     4511 Female 1.23 1.11 0.76
## 759     4512 Female 2.70   NA   NA
## 760     4514 Female 1.97   NA   NA
## 761     4515   Male 0.84   NA   NA
## 762     4516   Male 1.79   NA   NA
## 763     4517 Female 2.84   NA   NA
## 764     4518   Male 2.02   NA   NA
## 765     4519   Male 1.64 0.70   NA
## 766     4525 Female 1.08   NA   NA
## 767     4533 Female 2.97   NA   NA
## 768     4552 Female 0.94   NA   NA
## 769     4553 Female 2.97 1.94   NA
## 770     4554 Female 0.97   NA   NA
## 771     4555   Male 1.47   NA   NA
## 772     4559 Female 2.61   NA   NA
## 773     4563 Female 1.73 0.20 0.91
## 774     4564 Female 3.38 3.44 3.41
## 775     4569   Male 3.17 1.00 1.73
## 776     4570 Female 2.20 0.91   NA
## 777     4571 Female 2.14   NA   NA
## 778     4582   Male 1.29 1.58 0.81
## 779     4590 Female 3.21 2.85   NA
## 780     4597   Male 2.67   NA   NA
## 781     4598   Male 1.85 0.79   NA
## 782     4601 Female 1.35   NA   NA
## 783     4607 Female 2.14 0.76   NA
## 784     4611   Male 1.24 0.56   NA
## 785     4654 Female 2.02 1.78 2.55
## 786     4655 Female 2.32   NA   NA
## 787     4663   Male 1.08 0.23   NA
## 788     4666   Male 1.14 1.35 1.02
## 789     4690 Female 2.14 1.82   NA
## 790     4692 Female 2.88   NA   NA
## 791     4693   Male 1.35 0.17 0.44
## 792     4694   Male 1.00 1.70   NA
## 793     4695   Male 2.02   NA   NA
## 794     4696 Female 0.64 1.32   NA
## 795     4697   Male 0.29 0.14   NA
## 796     4698 Female 1.73 0.94   NA
## 797     4699 Female 1.82 1.52   NA
## 798     4704 Female 2.11   NA   NA
## 799     4710 Female 1.23   NA   NA
## 800     4711   Male 0.64   NA   NA
## 801     4724   Male 2.23 1.41 2.11
## 802     4725 Female 2.44 0.32   NA
## 803     4740   Male 1.17 0.58   NA
## 804     4744 Female 0.61 0.44   NA
## 805     4749 Female 0.52   NA   NA
## 806     4756 Female 2.91 0.94   NA
## 807     4758 Female 2.61 1.44   NA
## 808     4759 Female 1.47   NA   NA
## 809     4760   Male 1.28   NA   NA
## 810     4765 Female 1.26   NA   NA
hist_day1 <- dlf %>% ggplot(aes(x = day1)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 0.2,
                 color = "black", 
                 fill = "white")+
  geom_density(alpha = 0.2, 
               fill = "#FF6666")+
  labs(x = "Hygiene score on day 1",
       y = "Density")

hist_day1

hist_day1 +
  stat_function(fun = dnorm, 
                args = list(
                  mean = 
                    mean(
                      dlf$day1, na.rm = T), sd = sd(dlf$day1, na.rm = T)),
                color= "black", 
                size = 1)

hist_day2 <- dlf %>% 
  filter(!is.na(day2)) %>% 
  ggplot(aes(x = day2)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 0.1,
                 color = "black", 
                 fill = "white")+
  stat_function(fun = dnorm, 
                args = list(
                  mean = 
                    mean(
                      dlf$day2, na.rm = T), sd = sd(dlf$day2, na.rm = T)),
                color= "black", 
                size = 1) +
  labs(x = "Hygiene score on day 2",
       y = "Density")

hist_day2

hist_day3 <- dlf %>% 
  filter(!is.na(day3)) %>% 
  ggplot(aes(x = day3)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 0.25,
                 color = "black", 
                 fill = "white")+
  stat_function(fun = dnorm, 
                args = list(
                  mean = 
                    mean(
                      dlf$day3, na.rm = T), sd = sd(dlf$day3, na.rm = T)),
                color= "black", 
                size = 1) +
  labs(x = "Hygiene score on day 3",
       y = "Density")

hist_day3

dlf %>% filter(!is.na(day1)) %>% 
  ggplot(aes(sample = day1)) +
  stat_qq() +
  stat_qq_line() +
  labs(x = "Theoretical",
       y = "Density")

dlf %>% filter(!is.na(day2)) %>% 
  ggplot(aes(sample = day2)) +
  stat_qq() +
  stat_qq_line() +
  labs(x = "Theoretical",
       y = "Density")

dlf %>% filter(!is.na(day3)) %>% 
  ggplot(aes(sample = day3)) +
  stat_qq() +
  stat_qq_line() +
  labs(x = "Theoretical",
       y = "Density")

describe(dlf$day1)
## dlf$day1 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      810        0      199        1    1.771   0.7896   0.5945   0.8490 
##      .25      .50      .75      .90      .95 
##   1.3125   1.7900   2.2300   2.6700   2.9055 
## 
## lowest : 0.02 0.05 0.11 0.23 0.26, highest: 3.38 3.41 3.44 3.58 3.69
pastecs::stat.desc(dlf$day1, basic = F,norm = T)
##       median         mean      SE.mean CI.mean.0.95          var 
##     1.790000     1.771136     0.024368     0.047833     0.480996 
##      std.dev     coef.var     skewness     skew.2SE     kurtosis 
##     0.693539     0.391579    -0.004428    -0.025774    -0.421594 
##     kurt.2SE   normtest.W   normtest.p 
##    -1.228385     0.995915     0.031985
describe(dlf[,c('day1', 'day2', 'day3')])
## dlf[, c("day1", "day2", "day3")] 
## 
##  3  Variables      810  Observations
## ---------------------------------------------------------------------------
## day1 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      810        0      199        1    1.771   0.7896   0.5945   0.8490 
##      .25      .50      .75      .90      .95 
##   1.3125   1.7900   2.2300   2.6700   2.9055 
## 
## lowest : 0.02 0.05 0.11 0.23 0.26, highest: 3.38 3.41 3.44 3.58 3.69
## ---------------------------------------------------------------------------
## day2 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      264      546      102        1   0.9609   0.7818    0.140    0.200 
##      .25      .50      .75      .90      .95 
##    0.410    0.790    1.350    2.026    2.440 
## 
## lowest : 0.00 0.02 0.05 0.06 0.08, highest: 2.91 3.00 3.21 3.35 3.44
## ---------------------------------------------------------------------------
## day3 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      123      687       65    0.999   0.9765   0.7754    0.170    0.266 
##      .25      .50      .75      .90      .95 
##    0.440    0.760    1.525    1.904    2.146 
## 
## lowest : 0.02 0.08 0.11 0.14 0.17, highest: 2.29 2.55 2.70 3.02 3.41
## ---------------------------------------------------------------------------
pastecs::stat.desc(dlf[,c('day1', 'day2', 'day3')],  
                   basic = F, norm = T)
##                   day1      day2      day3
## median        1.790000 7.900e-01 7.600e-01
## mean          1.771136 9.609e-01 9.765e-01
## SE.mean       0.024368 4.436e-02 6.404e-02
## CI.mean.0.95  0.047833 8.735e-02 1.268e-01
## var           0.480996 5.195e-01 5.045e-01
## std.dev       0.693539 7.208e-01 7.103e-01
## coef.var      0.391579 7.501e-01 7.274e-01
## skewness     -0.004428 1.083e+00 1.008e+00
## skew.2SE     -0.025774 3.612e+00 2.309e+00
## kurtosis     -0.421594 7.555e-01 5.945e-01
## kurt.2SE     -1.228385 1.265e+00 6.863e-01
## normtest.W    0.995915 9.083e-01 9.078e-01
## normtest.p    0.031985 1.282e-11 3.804e-07
round(
  pastecs::stat.desc(dlf[,c('day1', 'day2', 'day3')],  
                   basic = F, norm = T),
  digits = 3
  )
##                day1  day2  day3
## median        1.790 0.790 0.760
## mean          1.771 0.961 0.977
## SE.mean       0.024 0.044 0.064
## CI.mean.0.95  0.048 0.087 0.127
## var           0.481 0.520 0.504
## std.dev       0.694 0.721 0.710
## coef.var      0.392 0.750 0.727
## skewness     -0.004 1.083 1.008
## skew.2SE     -0.026 3.612 2.309
## kurtosis     -0.422 0.755 0.595
## kurt.2SE     -1.228 1.265 0.686
## normtest.W    0.996 0.908 0.908
## normtest.p    0.032 0.000 0.000
(rexam <- read.delim("RExam.dat", header = TRUE))
##     exam computer lectures numeracy uni
## 1     18       54     75.0        7   0
## 2     30       47      8.5        1   0
## 3     40       58     69.5        6   0
## 4     30       37     67.0        6   0
## 5     40       53     44.5        2   0
## 6     15       48     76.5        8   0
## 7     36       49     70.0        3   0
## 8     40       49     18.5        7   0
## 9     63       45     43.5        4   0
## 10    31       62    100.0        6   0
## 11    22       67     48.0        3   0
## 12    47       62     10.5        3   0
## 13    38       38     57.5        1   0
## 14    34       37     61.5        8   0
## 15    54       54     54.0        4   0
## 16    35       48     71.0        5   0
## 17    33       48     14.0        9   0
## 18    38       42     55.5        3   0
## 19    29       57     72.5        2   0
## 20    36       55     38.0        4   0
## 21    59       41     40.0        1   0
## 22    31       42     85.5        4   0
## 23    34       48     52.0        4   0
## 24    28       44      8.0        3   0
## 25    50       42     62.5        6   0
## 26    59       42     70.5        3   0
## 27    33       40     98.0        4   0
## 28    57       52     34.5        2   0
## 29    25       56     62.5        3   0
## 30    53       54     91.5        2   0
## 31    65       52     97.5        7   0
## 32    47       55     31.5        2   0
## 33    28       61     80.5        7   0
## 34    43       56     66.5        4   0
## 35    47       52     57.5        2   0
## 36    60       49     67.0        4   0
## 37    45       43     48.5        4   0
## 38    22       51     61.0        4   0
## 39    39       49     76.0        3   0
## 40    43       56     30.5        2   0
## 41    66       41     45.0        2   0
## 42    36       67     21.5        4   0
## 43    26       35     72.5        5   0
## 44    58       49     66.0        8   0
## 45    53       62     90.5        6   0
## 46    37       66     48.5        4   0
## 47    48       48     62.0        3   0
## 48    32       46     49.0        1   0
## 49    42       46     60.0        5   0
## 50    34       58     21.0        5   0
## 51    56       30     84.5        7   1
## 52    76       48     51.0        8   1
## 53    72       54     58.5        5   1
## 54    77       44     42.0        6   1
## 55    77       54     65.5        9   1
## 56    66       58     56.0        7   1
## 57    62       59     71.5        2   1
## 58    86       54     48.5        5   1
## 59    97       35     84.5        5   1
## 60    72       56     47.5        2   1
## 61    69       53     54.0        3   1
## 62    87       56     70.5        6   1
## 63    88       65     73.0        5   1
## 64    72       50     79.0       12   1
## 65    75       39     82.5        8   1
## 66    74       40     74.5        3   1
## 67    68       50     85.0        2   1
## 68    81       57     69.5       10   1
## 69    77       39     42.0        7   1
## 70    71       41     43.0        8   1
## 71    60       48     46.5        6   1
## 72    74       46     36.5        8   1
## 73    80       54     72.5        4   1
## 74    68       55     62.0        4   1
## 75    64       27     81.5        5   1
## 76    94       57    100.0       13   1
## 77    65       73     27.0       14   1
## 78    72       54     59.5        2   1
## 79    75       54     75.0        3   1
## 80    92       50     34.0        2   1
## 81    89       56     78.0        4   1
## 82    83       57     80.5        5   1
## 83    80       54     84.0        2   1
## 84    95       55     37.5        4   1
## 85    99       54     57.0        3   1
## 86    80       52     66.0        8   1
## 87    81       67     59.0       10   1
## 88    75       44     68.5        5   1
## 89    78       57     88.5        3   1
## 90    65       54     55.0        8   1
## 91    80       51     86.0        5   1
## 92    86       55     68.5       10   1
## 93    73       51     64.0        7   1
## 94    81       45     12.5        1   1
## 95    69       59     52.5        7   1
## 96    60       43     37.0        5   1
## 97    69       57     46.0        2   1
## 98    71       50     97.5        2   1
## 99    82       50     70.5        4   1
## 100   58       47     78.0        3   1
rexam$uni <- factor(rexam$uni, 
                    levels = c(0:1),
                    labels = c('Duncetown University', 'Sussex University'))
head(rexam)
##   exam computer lectures numeracy                  uni
## 1   18       54     75.0        7 Duncetown University
## 2   30       47      8.5        1 Duncetown University
## 3   40       58     69.5        6 Duncetown University
## 4   30       37     67.0        6 Duncetown University
## 5   40       53     44.5        2 Duncetown University
## 6   15       48     76.5        8 Duncetown University
str(rexam)
## 'data.frame':    100 obs. of  5 variables:
##  $ exam    : int  18 30 40 30 40 15 36 40 63 31 ...
##  $ computer: int  54 47 58 37 53 48 49 49 45 62 ...
##  $ lectures: num  75 8.5 69.5 67 44.5 76.5 70 18.5 43.5 100 ...
##  $ numeracy: int  7 1 6 6 2 8 3 7 4 6 ...
##  $ uni     : Factor w/ 2 levels "Duncetown University",..: 1 1 1 1 1 1 1 1 1 1 ...
round(
  pastecs::stat.desc(rexam[,c("exam", "computer", "lectures", "numeracy")], 
                     basic = F,
                     norm = T),
  digits = 3
)
##                 exam computer lectures numeracy
## median        60.000   51.500   62.000    4.000
## mean          58.100   50.710   59.765    4.850
## SE.mean        2.132    0.826    2.168    0.271
## CI.mean.0.95   4.229    1.639    4.303    0.537
## var          454.354   68.228  470.230    7.321
## std.dev       21.316    8.260   21.685    2.706
## coef.var       0.367    0.163    0.363    0.558
## skewness      -0.104   -0.169   -0.410    0.933
## skew.2SE      -0.215   -0.350   -0.849    1.932
## kurtosis      -1.148    0.221   -0.285    0.763
## kurt.2SE      -1.200    0.231   -0.298    0.798
## normtest.W     0.961    0.987    0.977    0.924
## normtest.p     0.005    0.441    0.077    0.000

We came across these measures earlier on and found that we can interpret absolute values of kurt.2SE and skew.2SE greater than 1, 1.29, and 1.65 as significant p < .05, p < .01, and p < .001, respectively. We can see that for skew, numeracy scores are significantly positively skewed (p < .001) indicating a pile-up of scores on the left of the distribution (so most students got low scores). For kurtosis, prior exam scores are significant (p < .05).

rexam %>% filter(!is.na(exam)) %>% 
  ggplot(aes(exam)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 6,
                 color = "black",
                 fill = "white") +
  stat_function(fun = dnorm,
                args = list(mean = mean(rexam$exam, na.rm = T),
                            sd = sd(rexam$exam, na.rm = T)),
                color = "black",
                size = 1)+
  labs(x = "First Year Exam Scores",
       y = "Density")

rexam %>% filter(!is.na(computer)) %>% 
  ggplot(aes(computer)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 6,
                 color = "black",
                 fill = "white")+
  stat_function(fun = dnorm,
                args = list(mean = mean(rexam$computer, na.rm = T),
                            sd = sd(rexam$computer, na.rm = T)),
                color = "black",
                size = 1)+
  labs(x = "Computer Literacy",
       y = "Density")

rexam %>% filter(!is.na(lectures)) %>% 
  ggplot(aes(lectures)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 6,
                 color = "black",
                 fill = "white")+
  stat_function(fun = dnorm,
                args = list(mean = mean(rexam$lectures, na.rm = T),
                            sd = sd(rexam$lectures, na.rm = T)),
                color = "black",
                size = 1)+
  labs(x = "Percentage of Lectures Attended",
       y = "Density")

rexam %>% filter(!is.na(numeracy)) %>% 
  ggplot(aes(numeracy)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 1,
                 color = "black",
                 fill = "white")+
  stat_function(fun = dnorm,
                args = list(mean = mean(rexam$numeracy, na.rm = T),
                            sd = sd(rexam$numeracy, na.rm = T)),
                color = "black",
                size = 1)+
  labs(x = "Numeracy",
       y = "Density")

by(rexam, INDICES = rexam$uni, FUN = describe)
## rexam$uni: Duncetown University
## data[x, , drop = FALSE] 
## 
##  5  Variables      50  Observations
## ---------------------------------------------------------------------------
## exam 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       33    0.999    40.18    14.41    22.00    25.90 
##      .25      .50      .75      .90      .95 
##    31.25    38.00    47.75    59.00    61.65 
## 
## lowest : 15 18 22 25 26, highest: 59 60 63 65 66
## ---------------------------------------------------------------------------
## computer 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       25    0.997    50.26    9.264    37.45    40.90 
##      .25      .50      .75      .90      .95 
##    44.25    49.00    55.75    62.00    64.20 
## 
## lowest : 35 37 38 40 41, highest: 58 61 62 66 67
## ---------------------------------------------------------------------------
## lectures 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       45        1    56.26    27.08    12.08    20.75 
##      .25      .50      .75      .90      .95 
##    43.75    60.50    70.88    86.00    94.80 
## 
## lowest :   8.0   8.5  10.5  14.0  18.5, highest:  90.5  91.5  97.5  98.0 100.0
## ---------------------------------------------------------------------------
## numeracy 
##        n  missing distinct     Info     Mean      Gmd 
##       50        0        9    0.974     4.12    2.335 
##                                                        
## Value         1    2    3    4    5    6    7    8    9
## Frequency     4    8    9   12    4    5    4    3    1
## Proportion 0.08 0.16 0.18 0.24 0.08 0.10 0.08 0.06 0.02
## ---------------------------------------------------------------------------
## uni 
##                    n              missing             distinct 
##                   50                    0                    1 
##                value 
## Duncetown University 
##                                
## Value      Duncetown University
## Frequency                    50
## Proportion                    1
## ---------------------------------------------------------------------------
## -------------------------------------------------------- 
## rexam$uni: Sussex University
## data[x, , drop = FALSE] 
## 
##  5  Variables      50  Observations
## ---------------------------------------------------------------------------
## exam 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       30    0.998    76.02    11.64    60.00    63.80 
##      .25      .50      .75      .90      .95 
##    69.00    75.00    81.00    89.30    94.55 
## 
## lowest : 56 58 60 62 64, highest: 92 94 95 97 99
## ---------------------------------------------------------------------------
## computer 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       25    0.992    51.16    9.151    36.80    39.90 
##      .25      .50      .75      .90      .95 
##    47.25    54.00    56.00    58.10    62.30 
## 
## lowest : 27 30 35 39 40, highest: 58 59 65 67 73
## ---------------------------------------------------------------------------
## lectures 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       45        1    63.27     21.7    35.12    37.45 
##      .25      .50      .75      .90      .95 
##    49.12    65.75    78.00    84.55    87.38 
## 
## lowest :  12.5  27.0  34.0  36.5  37.0, highest:  85.0  86.0  88.5  97.5 100.0
## ---------------------------------------------------------------------------
## numeracy 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       50        0       13    0.985     5.58    3.429     2.00     2.00 
##      .25      .50      .75      .90      .95 
##     3.00     5.00     7.75    10.00    11.10 
##                                                                       
## Value         1    2    3    4    5    6    7    8    9   10   12   13
## Frequency     1    8    6    5    9    3    5    6    1    3    1    1
## Proportion 0.02 0.16 0.12 0.10 0.18 0.06 0.10 0.12 0.02 0.06 0.02 0.02
##                
## Value        14
## Frequency     1
## Proportion 0.02
## ---------------------------------------------------------------------------
## uni 
##                 n           missing          distinct             value 
##                50                 0                 1 Sussex University 
##                             
## Value      Sussex University
## Frequency                 50
## Proportion                 1
## ---------------------------------------------------------------------------
by(rexam, INDICES = rexam$uni, 
   FUN = pastecs::stat.desc, basic = F, norm = T)
## rexam$uni: Duncetown University
##                  exam computer lectures numeracy uni
## median        38.0000  49.0000  60.5000  4.00000  NA
## mean          40.1800  50.2600  56.2600  4.12000  NA
## SE.mean        1.7803   1.1410   3.3619  0.29227  NA
## CI.mean.0.95   3.5777   2.2929   6.7561  0.58733  NA
## var          158.4771  65.0943 565.1351  4.27102  NA
## std.dev       12.5888   8.0681  23.7726  2.06664  NA
## coef.var       0.3133   0.1605   0.4225  0.50161  NA
## skewness       0.2907   0.2121  -0.2904  0.48166  NA
## skew.2SE       0.4318   0.3151  -0.4314  0.71548  NA
## kurtosis      -0.7231  -0.6779  -0.5635 -0.65166  NA
## kurt.2SE      -0.5462  -0.5121  -0.4257 -0.49226  NA
## normtest.W     0.9722   0.9776   0.9697  0.94082  NA
## normtest.p     0.2829   0.4571   0.2259  0.01452  NA
## -------------------------------------------------------- 
## rexam$uni: Sussex University
##                  exam computer lectures  numeracy uni
## median        75.0000 54.00000  65.7500  5.000000  NA
## mean          76.0200 51.16000  63.2700  5.580000  NA
## SE.mean        1.4432  1.20284   2.6827  0.434333  NA
## CI.mean.0.95   2.9002  2.41720   5.3911  0.872824  NA
## var          104.1424 72.34122 359.8491  9.432245  NA
## std.dev       10.2050  8.50536  18.9697  3.071196  NA
## coef.var       0.1342  0.16625   0.2998  0.550394  NA
## skewness       0.2560 -0.50635  -0.3429  0.746369  NA
## skew.2SE       0.3803 -0.75216  -0.5094  1.108686  NA
## kurtosis      -0.4610  0.96405  -0.4234 -0.006440  NA
## kurt.2SE      -0.3482  0.72823  -0.3198 -0.004865  NA
## normtest.W     0.9837  0.94392   0.9817  0.932346  NA
## normtest.p     0.7151  0.01931   0.6263  0.006787  NA
by(rexam[, c("exam", "numeracy")], 
   INDICES = rexam$uni, 
   FUN = pastecs::stat.desc, basic = F, norm = T)
## rexam$uni: Duncetown University
##                  exam numeracy
## median        38.0000  4.00000
## mean          40.1800  4.12000
## SE.mean        1.7803  0.29227
## CI.mean.0.95   3.5777  0.58733
## var          158.4771  4.27102
## std.dev       12.5888  2.06664
## coef.var       0.3133  0.50161
## skewness       0.2907  0.48166
## skew.2SE       0.4318  0.71548
## kurtosis      -0.7231 -0.65166
## kurt.2SE      -0.5462 -0.49226
## normtest.W     0.9722  0.94082
## normtest.p     0.2829  0.01452
## -------------------------------------------------------- 
## rexam$uni: Sussex University
##                  exam  numeracy
## median        75.0000  5.000000
## mean          76.0200  5.580000
## SE.mean        1.4432  0.434333
## CI.mean.0.95   2.9002  0.872824
## var          104.1424  9.432245
## std.dev       10.2050  3.071196
## coef.var       0.1342  0.550394
## skewness       0.2560  0.746369
## skew.2SE       0.3803  1.108686
## kurtosis      -0.4610 -0.006440
## kurt.2SE      -0.3482 -0.004865
## normtest.W     0.9837  0.932346
## normtest.p     0.7151  0.006787
duncetown <- rexam %>% filter(!is.na(numeracy) & uni == "Duncetown University")
ggplot(duncetown ,aes(numeracy)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 1,
                 color = "black",
                 fill = "white")+
  stat_function(fun = dnorm,
                args = list(mean = mean(duncetown$numeracy) ,
                            sd = sd(duncetown$numeracy) ),
                color = "black",
                size = 1)+
  labs(x = "Numeracy Score",
       y = "Density")

sussex <- rexam %>% filter(!is.na(numeracy) & uni == "Sussex University")
ggplot(sussex ,aes(numeracy)) +
  geom_histogram(aes(y = ..density..), 
                 binwidth = 1,
                 color = "black",
                 fill = "white")+
  stat_function(fun = dnorm,
                args = list(mean = mean(sussex$numeracy) ,
                            sd = sd(sussex$numeracy) ),
                color = "black",
                size = 1)+
  labs(x = "Numeracy Score",
       y = "Density")

The Shapiro–Wilk test does just this: it compares the scores in the sample to a normally distributed set of scores with the same mean and standard deviation. If the test is non-significant (p > .05) it tells us that the distribution of the sample is not significantly different from a normal distribution. If, however, the test is significant (p < .05) then the distribution in question is significantly different from a normal distribution (i.e., it is non-normal). This test seems great: in one easy procedure it tells us whether our scores are normally distributed (nice!). However, it has limitations because with large sample sizes it is very easy to get significant results from small deviations from normality, and so a significant test doesn’t necessarily tell us whether the deviation from normality is enough to bias any statistical procedures that we apply to the data.

shapiro.test(rexam$exam)
## 
##  Shapiro-Wilk normality test
## 
## data:  rexam$exam
## W = 0.96, p-value = 0.005
shapiro.test(rexam$numeracy)
## 
##  Shapiro-Wilk normality test
## 
## data:  rexam$numeracy
## W = 0.92, p-value = 2e-05
Shapiro–Wilk tests for the two universities
by(
  rexam$exam, INDICES = rexam$uni, FUN = shapiro.test
)
## rexam$uni: Duncetown University
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.97, p-value = 0.3
## 
## -------------------------------------------------------- 
## rexam$uni: Sussex University
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.98, p-value = 0.7

Normal within the two groups (the p-values are greater than .05)

by(
  rexam$numeracy, INDICES = rexam$uni, FUN = shapiro.test
)
## rexam$uni: Duncetown University
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.94, p-value = 0.01
## 
## -------------------------------------------------------- 
## rexam$uni: Sussex University
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.93, p-value = 0.007

For numeracy scores the tests are still significant indicating non-normal distributions both for Duncetown University (p = .015), and Sussex University (p = .007).

rexam %>% filter(!is.na(exam)) %>% 
  ggplot(aes(sample = exam)) +
  stat_qq() +
  stat_qq_line() +
  labs(x = "Theoretical",
       y = "Sample")

rexam %>% filter(!is.na(numeracy)) %>% 
  ggplot(aes(sample = numeracy)) +
  stat_qq() +
  stat_qq_line() +
  labs(x = "Theoretical",
       y = "sample")

The test statistic for the Shapiro–Wilk test is denoted by \(W\); we can report the results in Output 5.5 in the following way:

Normality tests

str(rexam)
## 'data.frame':    100 obs. of  5 variables:
##  $ exam    : int  18 30 40 30 40 15 36 40 63 31 ...
##  $ computer: int  54 47 58 37 53 48 49 49 45 62 ...
##  $ lectures: num  75 8.5 69.5 67 44.5 76.5 70 18.5 43.5 100 ...
##  $ numeracy: int  7 1 6 6 2 8 3 7 4 6 ...
##  $ uni     : Factor w/ 2 levels "Duncetown University",..: 1 1 1 1 1 1 1 1 1 1 ...

Levene’s test with R

Levene’s test tests the null hypothesis that the variances in different groups are equal (i.e., the difference between the variances is zero). For now, all we need to know is that if Levene’s test is significant at p ≤ .05 then we can conclude that the null hypothesis is incorrect and that the variances are significantly different – therefore, the assumption of homogeneity of variances has been violated. If, however, Levene’s test is non-significant (i.e., p > .05) then the variances are roughly equal and the assumption is tenable

To use Levene’s test, we use the leveneTest() function from the (???) package.

# default centre median
car::leveneTest(rexam$exam, rexam$uni)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1    2.09   0.15
##       98
car::leveneTest(rexam$exam, rexam$uni, center = mean)
## Levene's Test for Homogeneity of Variance (center = mean)
##       Df F value Pr(>F)
## group  1    2.58   0.11
##       98
car::leveneTest(rexam$numeracy, rexam$uni)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1    5.37  0.023
##       98

For the percentage on the R exam, the variances were similar for Duncetown and Sussex University students, 98) = 2.09, ns, but for numeracy scores the variances were significantly different in the two groups, 98) = 5.37, p = .023.

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.1 LTS
## 
## Matrix products: default
## BLAS: /home/michael/anaconda3/lib/R/lib/libRblas.so
## LAPACK: /home/michael/anaconda3/lib/R/lib/libRlapack.so
## 
## locale:
## [1] en_CA.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] bindrcpp_0.2.2       Hmisc_4.1-1          Formula_1.2-3       
## [4] survival_2.42-3      lattice_0.20-35      ggplot2_3.0.0       
## [7] dplyr_0.7.6          RevoUtils_11.0.1     RevoUtilsMath_11.0.0
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.18        assertthat_0.2.0    rprojroot_1.3-2    
##  [4] digest_0.6.15       cellranger_1.1.0    R6_2.2.2           
##  [7] plyr_1.8.4          backports_1.1.2     acepack_1.4.1      
## [10] evaluate_0.11       blogdown_0.9.8      pillar_1.3.0       
## [13] rlang_0.2.1         readxl_1.1.0        lazyeval_0.2.1     
## [16] curl_3.2            rstudioapi_0.7      data.table_1.11.4  
## [19] car_3.0-0           rpart_4.1-13        Matrix_1.2-14      
## [22] checkmate_1.8.5     rmarkdown_1.10      labeling_0.3       
## [25] splines_3.5.1       stringr_1.3.1       foreign_0.8-70     
## [28] htmlwidgets_1.2     munsell_0.5.0       compiler_3.5.1     
## [31] xfun_0.4.11         pkgconfig_2.0.1     base64enc_0.1-3    
## [34] htmltools_0.3.6     nnet_7.3-12         tidyselect_0.2.4   
## [37] tibble_1.4.2        gridExtra_2.3       htmlTable_1.12     
## [40] bookdown_0.7        rio_0.5.10          codetools_0.2-15   
## [43] crayon_1.3.4        withr_2.1.2         grid_3.5.1         
## [46] gtable_0.2.0        magrittr_1.5        scales_0.5.0       
## [49] zip_1.0.0           carData_3.0-1       stringi_1.2.4      
## [52] latticeExtra_0.6-28 openxlsx_4.1.0      boot_1.3-20        
## [55] RColorBrewer_1.1-2  tools_3.5.1         forcats_0.3.0      
## [58] glue_1.3.0          purrr_0.2.5         hms_0.4.2          
## [61] abind_1.4-5         yaml_2.2.0          colorspace_1.3-2   
## [64] cluster_2.0.7-1     knitr_1.20          bindr_0.1.1        
## [67] haven_1.1.2         pastecs_1.3.21

References

knitr::write_bib(.packages(), "packages.bib") 
## tweaking Hmisc

Harrell, Frank E, Jr. 2018. Hmisc: Harrell Miscellaneous. https://CRAN.R-project.org/package=Hmisc.

R Core Team. 2018. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.

Wickham, Hadley, Winston Chang, Lionel Henry, Thomas Lin Pedersen, Kohske Takahashi, Claus Wilke, and Kara Woo. 2018. Ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics. https://CRAN.R-project.org/package=ggplot2.

Wickham, Hadley, Romain François, Lionel Henry, and Kirill Müller. 2018. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr.