# ch 5

## 2018/08/22

library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
##     filter, lag
## The following objects are masked from 'package:base':
##
##     intersect, setdiff, setequal, union
library(ggplot2)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
##     src, summarize
## The following objects are masked from 'package:base':
##
##     format.pval, units
knitr::opts_chunk$set(cache=TRUE) (dlf <- read.delim("DownloadFestival(No Outlier).dat")) ## ticknumb gender day1 day2 day3 ## 1 2111 Male 2.64 1.35 1.61 ## 2 2229 Female 0.97 1.41 0.29 ## 3 2338 Male 0.84 NA NA ## 4 2384 Female 3.03 NA NA ## 5 2401 Female 0.88 0.08 NA ## 6 2405 Male 0.85 NA NA ## 7 2467 Female 1.56 NA NA ## 8 2478 Female 3.02 NA NA ## 9 2490 Male 2.29 NA NA ## 10 2504 Female 1.11 0.44 0.55 ## 11 2509 Male 2.17 NA NA ## 12 2510 Female 0.82 0.20 0.47 ## 13 2514 Male 1.41 NA NA ## 14 2515 Female 1.76 1.64 1.58 ## 15 2520 Male 1.38 0.02 NA ## 16 2521 Female 2.79 NA NA ## 17 2529 Male 1.50 NA NA ## 18 2533 Female 1.91 2.05 NA ## 19 2535 Female 2.32 NA NA ## 20 2538 Male 2.05 NA NA ## 21 2549 Male 2.17 0.70 0.76 ## 22 2551 Female 2.05 NA NA ## 23 2558 Female 1.61 NA NA ## 24 2562 Female 1.66 0.85 NA ## 25 2565 Female 2.30 NA NA ## 26 2566 Female 2.76 NA NA ## 27 2568 Female 1.44 NA NA ## 28 2586 Female 1.06 NA NA ## 29 2601 Male 3.23 NA NA ## 30 2602 Male 0.97 0.38 0.76 ## 31 2604 Male 2.57 0.11 0.02 ## 32 2606 Female 0.26 NA NA ## 33 2609 Female 0.47 NA NA ## 34 2611 Female 1.73 NA NA ## 35 2612 Male 1.94 0.82 1.67 ## 36 2616 Male 1.91 NA NA ## 37 2624 Female 2.08 0.91 0.96 ## 38 2633 Female 1.91 NA NA ## 39 2642 Female 1.42 NA NA ## 40 2644 Male 1.50 NA NA ## 41 2662 Female 0.11 NA NA ## 42 2663 Male 1.67 NA NA ## 43 2664 Male 2.08 NA NA ## 44 2669 Female 2.05 NA NA ## 45 2670 Male 2.00 NA NA ## 46 2675 Male 1.52 NA NA ## 47 2676 Female 1.58 NA NA ## 48 2677 Male 1.28 0.38 0.14 ## 49 2681 Male 1.88 NA NA ## 50 2685 Female 1.32 NA NA ## 51 2686 Female 2.09 NA NA ## 52 2687 Male 2.00 NA NA ## 53 2688 Female 2.64 NA NA ## 54 2689 Male 0.85 0.32 0.52 ## 55 2692 Female 2.47 0.23 0.38 ## 56 2698 Female 1.79 NA NA ## 57 2709 Female 1.64 NA NA ## 58 2710 Male 1.32 NA NA ## 59 2712 Female 2.97 NA NA ## 60 2726 Female 1.44 0.14 NA ## 61 2727 Female 2.02 NA NA ## 62 2728 Male 1.79 NA NA ## 63 2730 Male 1.34 NA NA ## 64 2731 Female 2.29 1.90 NA ## 65 2732 Female 1.66 NA NA ## 66 2734 Female 0.60 NA NA ## 67 2735 Female 1.76 0.76 0.29 ## 68 2736 Female 1.50 NA NA ## 69 2737 Female 2.08 0.70 NA ## 70 2748 Female 1.00 0.55 NA ## 71 2752 Male 1.73 NA NA ## 72 2756 Female 1.05 0.38 NA ## 73 2758 Male 2.81 NA NA ## 74 2759 Female 1.52 NA NA ## 75 2768 Male 1.47 NA NA ## 76 2769 Female 2.64 NA NA ## 77 2770 Female 2.20 1.18 NA ## 78 2771 Male 0.55 0.79 NA ## 79 2772 Male 2.29 NA NA ## 80 2773 Female 2.00 NA NA ## 81 2775 Male 2.23 NA NA ## 82 2779 Female 2.45 NA NA ## 83 2780 Male 1.20 NA NA ## 84 2783 Male 2.91 2.08 2.11 ## 85 2784 Female 1.14 1.00 NA ## 86 2788 Male 1.88 NA NA ## 87 2789 Male 0.94 NA NA ## 88 2791 Female 1.85 NA NA ## 89 2794 Female 2.58 NA NA ## 90 2796 Female 0.61 NA NA ## 91 2799 Male 0.70 NA NA ## 92 2806 Male 1.38 NA NA ## 93 2807 Female 1.94 NA NA ## 94 2811 Female 2.29 NA NA ## 95 2812 Male 1.59 NA NA ## 96 2813 Female 2.46 NA NA ## 97 2814 Female 1.67 0.14 NA ## 98 2825 Female 2.02 0.58 0.50 ## 99 2827 Male 1.50 NA NA ## 100 2828 Female 2.70 1.70 1.91 ## 101 2829 Female 1.61 NA NA ## 102 2830 Male 2.29 NA NA ## 103 2831 Female 0.97 1.06 0.76 ## 104 2839 Male 1.85 NA NA ## 105 2842 Female 2.76 NA NA ## 106 2857 Male 1.64 NA NA ## 107 2858 Male 1.17 NA NA ## 108 2859 Male 1.57 NA NA ## 109 2862 Female 2.23 NA NA ## 110 2863 Female 2.05 1.58 2.15 ## 111 2875 Female 2.05 NA NA ## 112 2876 Female 2.94 NA NA ## 113 2883 Female 2.39 NA NA ## 114 2890 Male 1.94 NA NA ## 115 2894 Female 2.12 NA NA ## 116 2895 Female 1.11 NA NA ## 117 2897 Female 0.97 NA NA ## 118 2898 Female 1.35 NA NA ## 119 2899 Female 2.81 2.08 NA ## 120 2900 Female 2.50 NA NA ## 121 2901 Male 1.87 NA NA ## 122 2920 Male 1.33 NA NA ## 123 2921 Female 1.26 NA NA ## 124 2922 Female 1.44 NA NA ## 125 2923 Male 0.55 NA NA ## 126 2924 Female 1.75 NA NA ## 127 2925 Female 2.08 NA NA ## 128 2932 Male 0.85 NA NA ## 129 2933 Female 2.52 NA NA ## 130 2935 Female 3.00 NA NA ## 131 2936 Female 1.41 NA NA ## 132 2937 Female 1.08 NA NA ## 133 2938 Male 1.20 1.38 1.50 ## 134 2940 Male 1.94 1.44 NA ## 135 2941 Female 2.26 1.73 1.73 ## 136 2942 Male 1.41 NA NA ## 137 2948 Female 2.50 NA NA ## 138 2952 Male 2.17 NA NA ## 139 2953 Male 1.82 1.11 NA ## 140 2954 Female 1.44 1.14 NA ## 141 2956 Female 1.66 NA NA ## 142 2957 Male 1.82 NA NA ## 143 2958 Male 1.26 NA NA ## 144 2959 Female 2.67 NA NA ## 145 2961 Female 1.47 NA NA ## 146 2962 Female 1.84 NA NA ## 147 2964 Female 2.58 NA NA ## 148 2966 Male 1.73 NA NA ## 149 2967 Male 1.23 NA NA ## 150 2968 Male 2.32 NA NA ## 151 2972 Female 2.67 NA NA ## 152 2974 Female 1.02 NA NA ## 153 2975 Female 1.66 2.12 2.70 ## 154 2976 Female 1.88 NA NA ## 155 2977 Female 1.91 NA NA ## 156 2978 Female 1.64 NA NA ## 157 2979 Male 1.34 NA NA ## 158 2982 Male 1.85 NA NA ## 159 2983 Male 2.08 NA NA ## 160 2984 Male 1.02 NA NA ## 161 2985 Female 1.79 NA NA ## 162 2988 Female 1.94 NA NA ## 163 2989 Female 3.26 1.97 1.67 ## 164 2990 Male 1.14 0.58 0.11 ## 165 3008 Female 1.50 0.70 0.38 ## 166 3009 Female 2.03 NA NA ## 167 3010 Female 2.24 NA NA ## 168 3013 Female 1.11 NA NA ## 169 3014 Female 2.21 NA NA ## 170 3016 Male 1.94 NA NA ## 171 3017 Male 2.41 NA NA ## 172 3018 Male 0.88 NA NA ## 173 3019 Female 1.17 1.35 NA ## 174 3027 Male 2.23 NA NA ## 175 3028 Female 1.64 NA NA ## 176 3029 Female 2.14 NA NA ## 177 3030 Male 0.11 0.29 NA ## 178 3031 Male 2.17 NA NA ## 179 3032 Female 1.67 NA NA ## 180 3033 Female 1.00 NA NA ## 181 3034 Female 0.88 NA NA ## 182 3035 Male 2.20 NA NA ## 183 3048 Male 2.17 NA NA ## 184 3049 Female 2.32 NA NA ## 185 3051 Male 1.64 NA NA ## 186 3052 Female 3.00 NA NA ## 187 3053 Female 2.38 0.85 NA ## 188 3054 Female 1.60 1.02 NA ## 189 3055 Female 1.58 NA NA ## 190 3056 Female 2.61 NA NA ## 191 3057 Male 1.44 0.05 0.20 ## 192 3064 Male 1.57 NA NA ## 193 3065 Female 2.32 NA NA ## 194 3068 Female 1.14 NA NA ## 195 3069 Male 1.93 NA NA ## 196 3070 Male 2.47 NA NA ## 197 3072 Female 2.29 NA NA ## 198 3073 Female 1.00 NA NA ## 199 3092 Female 1.58 NA NA ## 200 3093 Male 2.44 NA NA ## 201 3094 Female 0.83 NA NA ## 202 3095 Male 2.71 0.78 0.33 ## 203 3096 Female 1.73 NA NA ## 204 3097 Male 1.58 NA NA ## 205 3098 Male 1.50 NA NA ## 206 3100 Female 1.05 NA NA ## 207 3106 Male 2.05 NA NA ## 208 3107 Female 2.63 NA NA ## 209 3109 Male 2.55 2.29 NA ## 210 3111 Female 2.00 NA NA ## 211 3112 Male 2.00 NA NA ## 212 3114 Female 1.32 NA NA ## 213 3116 Female 3.14 NA NA ## 214 3118 Male 1.44 NA NA ## 215 3129 Male 1.85 0.23 NA ## 216 3131 Female 1.41 0.44 NA ## 217 3132 Female 1.94 NA NA ## 218 3133 Male 2.91 NA NA ## 219 3135 Female 1.85 NA NA ## 220 3136 Male 1.70 NA NA ## 221 3137 Female 2.23 NA NA ## 222 3138 Male 1.11 NA NA ## 223 3139 Female 1.47 NA NA ## 224 3146 Female 2.20 NA NA ## 225 3147 Male 1.82 NA NA ## 226 3148 Female 1.42 NA NA ## 227 3168 Female 2.44 NA NA ## 228 3171 Female 2.66 NA NA ## 229 3172 Male 1.52 NA NA ## 230 3173 Female 1.35 0.47 0.73 ## 231 3180 Female 1.29 NA NA ## 232 3182 Female 2.32 NA NA ## 233 3190 Male 0.78 NA NA ## 234 3192 Female 2.84 NA NA ## 235 3202 Male 0.97 NA NA ## 236 3236 Female 1.52 NA NA ## 237 3245 Female 1.70 NA NA ## 238 3246 Female 0.94 1.17 1.29 ## 239 3247 Male 1.41 NA NA ## 240 3248 Male 1.79 NA NA ## 241 3249 Male 1.08 0.44 0.44 ## 242 3250 Male 1.47 NA NA ## 243 3251 Female 1.79 0.47 NA ## 244 3253 Male 2.00 NA NA ## 245 3254 Female 0.76 NA NA ## 246 3255 Male 2.20 NA NA ## 247 3256 Female 0.94 0.17 NA ## 248 3257 Female 1.38 NA NA ## 249 3258 Female 1.38 0.85 NA ## 250 3260 Male 0.32 NA NA ## 251 3261 Male 2.58 NA NA ## 252 3262 Male 0.51 NA NA ## 253 3264 Female 0.32 NA NA ## 254 3267 Female 0.91 1.11 1.70 ## 255 3273 Male 1.51 NA NA ## 256 3275 Female 1.47 NA NA ## 257 3276 Female 2.50 NA NA ## 258 3277 Female 2.26 NA NA ## 259 3278 Female 2.81 NA NA ## 260 3279 Female 1.87 NA NA ## 261 3281 Female 2.00 NA NA ## 262 3282 Female 2.23 0.41 1.02 ## 263 3284 Male 2.00 0.76 NA ## 264 3290 Female 1.41 NA NA ## 265 3291 Male 1.64 NA NA ## 266 3292 Male 1.64 NA NA ## 267 3296 Male 1.26 NA NA ## 268 3306 Female 1.52 0.55 1.88 ## 269 3307 Male 2.44 1.02 0.76 ## 270 3308 Female 2.18 NA NA ## 271 3309 Female 3.02 NA NA ## 272 3310 Female 1.02 NA NA ## 273 3311 Female 2.88 NA NA ## 274 3312 Male 1.54 NA NA ## 275 3313 Female 1.64 NA NA ## 276 3314 Female 2.44 2.50 1.70 ## 277 3315 Female 1.29 NA NA ## 278 3316 Female 1.61 0.32 0.26 ## 279 3321 Female 1.77 NA NA ## 280 3325 Male 0.91 0.17 NA ## 281 3326 Female 0.85 0.20 0.38 ## 282 3327 Male 0.85 0.52 0.44 ## 283 3328 Female 1.50 NA NA ## 284 3329 Male 1.05 0.23 NA ## 285 3338 Female 3.38 NA NA ## 286 3340 Female 1.42 0.52 2.00 ## 287 3341 Female 1.85 NA NA ## 288 3348 Female 1.91 0.84 NA ## 289 3349 Male 0.82 0.26 NA ## 290 3350 Female 1.32 0.76 NA ## 291 3351 Female 2.23 0.85 0.39 ## 292 3352 Female 1.47 1.52 0.17 ## 293 3363 Female 2.70 NA NA ## 294 3365 Male 1.58 NA NA ## 295 3366 Male 1.00 NA NA ## 296 3367 Female 1.44 NA NA ## 297 3368 Female 2.00 NA NA ## 298 3369 Male 1.60 NA NA ## 299 3370 Female 2.32 2.53 1.67 ## 300 3371 Female 3.41 NA NA ## 301 3372 Female 2.02 NA NA ## 302 3373 Male 0.64 0.52 NA ## 303 3374 Male 3.58 3.35 NA ## 304 3375 Male 1.50 NA NA ## 305 3376 Male 1.08 NA NA ## 306 3377 Female 1.52 NA NA ## 307 3378 Male 1.26 NA NA ## 308 3379 Female 1.68 NA NA ## 309 3380 Male 1.47 1.08 0.58 ## 310 3390 Female 1.47 NA NA ## 311 3391 Female 1.67 1.55 NA ## 312 3392 Female 2.47 1.97 NA ## 313 3393 Male 1.82 NA NA ## 314 3394 Female 2.17 NA NA ## 315 3395 Female 3.21 NA NA ## 316 3397 Female 1.60 1.38 1.02 ## 317 3398 Female 0.32 NA NA ## 318 3407 Female 0.55 NA NA ## 319 3411 Female 1.42 NA NA ## 320 3412 Male 1.14 NA NA ## 321 3413 Female 2.64 NA NA ## 322 3416 Female 2.58 NA NA ## 323 3418 Female 2.02 NA NA ## 324 3419 Male 2.00 NA NA ## 325 3420 Female 2.90 NA NA ## 326 3423 Male 1.82 NA NA ## 327 3429 Female 0.50 NA NA ## 328 3431 Male 1.53 NA NA ## 329 3449 Female 2.48 NA NA ## 330 3450 Female 2.05 NA NA ## 331 3453 Male 2.52 NA NA ## 332 3454 Female 1.88 NA NA ## 333 3455 Male 2.73 NA NA ## 334 3456 Female 2.88 NA NA ## 335 3457 Female 1.67 NA NA ## 336 3458 Female 1.93 NA NA ## 337 3460 Female 1.67 NA NA ## 338 3461 Male 1.20 NA NA ## 339 3464 Female 2.75 NA NA ## 340 3467 Female 1.94 0.97 NA ## 341 3468 Female 0.59 NA NA ## 342 3469 Female 1.50 NA NA ## 343 3477 Male 1.58 0.94 0.94 ## 344 3480 Female 2.23 0.11 0.17 ## 345 3483 Female 2.35 NA NA ## 346 3490 Female 2.55 0.82 0.29 ## 347 3493 Male 1.55 NA NA ## 348 3494 Female 2.31 NA NA ## 349 3495 Male 2.23 NA NA ## 350 3500 Female 0.67 0.50 NA ## 351 3501 Male 2.51 NA NA ## 352 3503 Male 1.08 0.58 0.61 ## 353 3510 Female 2.44 NA NA ## 354 3511 Female 0.23 0.14 NA ## 355 3512 Female 2.17 NA NA ## 356 3518 Male 1.90 1.17 NA ## 357 3519 Female 1.67 0.44 NA ## 358 3521 Female 2.00 0.58 0.52 ## 359 3522 Female 2.44 NA NA ## 360 3523 Male 1.44 NA NA ## 361 3524 Male 0.82 NA NA ## 362 3525 Female 2.50 NA NA ## 363 3526 Male 1.82 NA NA ## 364 3536 Female 1.97 NA NA ## 365 3539 Female 2.52 NA NA ## 366 3540 Female 0.05 NA NA ## 367 3542 Female 2.08 NA NA ## 368 3545 Female 2.39 NA NA ## 369 3546 Male 1.45 0.82 NA ## 370 3551 Male 2.58 NA NA ## 371 3557 Female 2.12 NA NA ## 372 3565 Female 2.02 0.76 0.55 ## 373 3567 Male 1.78 1.14 0.44 ## 374 3568 Male 0.73 0.17 0.76 ## 375 3569 Female 2.26 0.90 1.85 ## 376 3570 Female 2.79 NA NA ## 377 3571 Male 0.43 0.67 0.14 ## 378 3572 Male 0.52 0.38 0.70 ## 379 3573 Female 2.32 NA NA ## 380 3574 Male 2.22 NA NA ## 381 3575 Male 0.58 NA NA ## 382 3576 Male 2.00 NA NA ## 383 3577 Female 0.70 NA NA ## 384 3586 Male 1.00 NA NA ## 385 3587 Male 0.30 NA NA ## 386 3588 Male 1.52 NA NA ## 387 3593 Female 1.58 0.35 NA ## 388 3594 Male 2.34 NA NA ## 389 3595 Female 0.79 NA NA ## 390 3596 Female 2.26 NA NA ## 391 3597 Male 2.35 NA NA ## 392 3598 Female 1.70 NA NA ## 393 3599 Female 3.09 NA NA ## 394 3600 Female 1.52 NA NA ## 395 3601 Female 0.35 NA NA ## 396 3602 Female 2.70 NA NA ## 397 3603 Female 1.64 NA NA ## 398 3605 Male 0.82 NA NA ## 399 3606 Male 2.73 NA NA ## 400 3607 Female 2.23 NA NA ## 401 3609 Female 1.06 NA NA ## 402 3610 Male 2.05 0.20 0.35 ## 403 3613 Female 1.73 1.44 NA ## 404 3614 Female 0.93 0.91 NA ## 405 3620 Female 2.50 2.44 NA ## 406 3621 Male 1.44 NA NA ## 407 3622 Female 2.88 NA NA ## 408 3625 Female 0.67 0.23 0.44 ## 409 3626 Male 1.85 0.35 NA ## 410 3627 Female 1.21 0.79 NA ## 411 3628 Male 1.06 0.76 0.70 ## 412 3629 Male 0.61 0.26 0.33 ## 413 3631 Female 2.00 NA NA ## 414 3634 Female 1.17 0.73 1.17 ## 415 3635 Female 1.48 0.79 1.55 ## 416 3645 Female 1.55 NA NA ## 417 3646 Female 3.29 NA NA ## 418 3647 Male 1.47 NA NA ## 419 3648 Male 0.96 NA NA ## 420 3649 Male 1.00 1.11 1.20 ## 421 3652 Male 1.47 NA NA ## 422 3653 Female 2.55 2.38 NA ## 423 3654 Female 0.44 0.06 NA ## 424 3655 Female 2.35 2.41 NA ## 425 3656 Female 1.71 0.85 NA ## 426 3659 Male 1.84 0.58 0.70 ## 427 3660 Female 1.11 0.23 0.55 ## 428 3667 Female 1.38 NA NA ## 429 3669 Male 0.88 NA NA ## 430 3670 Female 0.94 NA NA ## 431 3672 Female 1.91 NA NA ## 432 3676 Male 2.76 NA NA ## 433 3677 Male 1.55 0.32 0.47 ## 434 3678 Male 2.67 NA NA ## 435 3680 Female 1.03 0.29 0.72 ## 436 3696 Female 2.50 NA NA ## 437 3697 Male 1.64 NA NA ## 438 3698 Female 2.26 NA NA ## 439 3709 Female 2.14 NA NA ## 440 3710 Male 0.52 NA NA ## 441 3711 Male 1.08 NA NA ## 442 3712 Male 1.69 NA NA ## 443 3713 Male 2.73 NA NA ## 444 3714 Male 1.91 NA NA ## 445 3715 Male 1.73 NA NA ## 446 3716 Female 3.21 NA NA ## 447 3717 Female 2.11 NA NA ## 448 3718 Female 2.05 NA NA ## 449 3719 Male 2.17 NA NA ## 450 3720 Female 2.17 NA NA ## 451 3721 Female 2.30 NA NA ## 452 3722 Female 2.56 NA NA ## 453 3723 Female 2.11 0.41 0.47 ## 454 3724 Male 1.70 NA NA ## 455 3725 Female 1.23 NA NA ## 456 3726 Female 3.20 NA NA ## 457 3727 Female 2.02 NA NA ## 458 3728 Female 2.64 NA NA ## 459 3729 Male 2.52 0.14 NA ## 460 3730 Male 1.61 NA NA ## 461 3731 Male 1.50 1.20 0.91 ## 462 3734 Male 1.15 0.45 0.44 ## 463 3736 Male 1.82 NA NA ## 464 3753 Female 1.50 NA NA ## 465 3754 Female 2.32 NA NA ## 466 3759 Female 2.92 NA NA ## 467 3764 Male 1.41 NA NA ## 468 3769 Male 1.35 NA NA ## 469 3778 Male 0.61 0.14 NA ## 470 3779 Female 0.73 NA NA ## 471 3782 Female 2.23 1.88 NA ## 472 3783 Female 1.32 0.91 NA ## 473 3784 Female 2.94 1.79 NA ## 474 3785 Male 1.61 NA NA ## 475 3786 Female 1.00 NA NA ## 476 3787 Female 3.15 3.00 NA ## 477 3788 Female 2.88 NA NA ## 478 3791 Female 2.09 1.21 NA ## 479 3792 Female 1.32 1.70 2.29 ## 480 3793 Male 1.47 0.35 NA ## 481 3794 Male 1.61 NA NA ## 482 3796 Male 2.20 1.50 1.88 ## 483 3797 Female 2.78 NA NA ## 484 3798 Female 2.06 NA NA ## 485 3799 Female 0.47 NA NA ## 486 3804 Female 2.87 NA NA ## 487 3826 Male 1.14 NA NA ## 488 3828 Female 3.32 3.21 NA ## 489 3831 Female 2.08 1.38 NA ## 490 3836 Female 2.38 2.50 NA ## 491 3837 Female 2.08 NA NA ## 492 3838 Male 1.85 NA NA ## 493 3840 Male 1.38 NA NA ## 494 3844 Male 1.14 NA NA ## 495 3846 Male 1.58 NA NA ## 496 3849 Male 1.23 0.70 1.02 ## 497 3850 Female 2.53 NA NA ## 498 3851 Male 0.67 NA NA ## 499 3854 Female 0.73 NA NA ## 500 3855 Female 1.34 NA NA ## 501 3856 Female 2.14 0.70 NA ## 502 3857 Female 1.00 NA NA ## 503 3858 Male 1.35 NA NA ## 504 3859 Male 1.94 0.79 NA ## 505 3860 Male 0.50 NA NA ## 506 3869 Female 3.08 NA NA ## 507 3870 Female 2.88 NA NA ## 508 3871 Female 1.91 NA NA ## 509 3872 Male 1.41 NA NA ## 510 3873 Male 2.02 NA NA ## 511 3874 Female 0.76 NA NA ## 512 3875 Male 1.94 NA NA ## 513 3876 Male 0.67 0.28 NA ## 514 3879 Female 2.41 NA NA ## 515 3880 Female 2.17 NA NA ## 516 3882 Female 2.67 0.41 NA ## 517 3883 Male 1.94 0.64 NA ## 518 3888 Female 2.05 0.85 1.70 ## 519 3890 Male 2.17 NA NA ## 520 3909 Male 0.47 NA NA ## 521 3912 Male 0.62 0.76 NA ## 522 3913 Female 2.00 NA NA ## 523 3914 Male 0.45 NA NA ## 524 3916 Female 2.29 0.91 1.38 ## 525 3917 Female 2.55 NA NA ## 526 3918 Female 0.82 NA NA ## 527 3919 Female 3.12 2.20 NA ## 528 3920 Male 2.50 2.23 0.41 ## 529 3921 Male 1.79 NA NA ## 530 3922 Female 2.28 NA NA ## 531 3923 Male 0.58 1.05 0.58 ## 532 3924 Female 2.50 1.29 NA ## 533 3925 Female 1.41 NA NA ## 534 3926 Female 2.14 NA NA ## 535 3927 Male 0.76 0.26 NA ## 536 3929 Male 1.79 1.11 NA ## 537 3930 Male 1.02 0.35 NA ## 538 3932 Female 2.62 NA NA ## 539 3942 Male 0.88 NA NA ## 540 3944 Male 1.58 0.20 0.20 ## 541 3945 Female 2.20 NA NA ## 542 3946 Female 1.14 NA NA ## 543 3947 Male 1.47 0.52 0.35 ## 544 3948 Female 1.41 0.23 0.38 ## 545 3949 Male 1.44 1.76 1.18 ## 546 3950 Female 1.23 1.17 NA ## 547 3951 Female 1.82 NA NA ## 548 3952 Female 2.44 NA NA ## 549 3954 Female 1.94 1.20 NA ## 550 3955 Male 2.41 NA NA ## 551 3964 Female 2.27 NA NA ## 552 3965 Male 1.79 0.23 NA ## 553 3966 Male 1.88 NA NA ## 554 3972 Male 1.85 0.64 1.32 ## 555 3973 Female 2.21 NA NA ## 556 3986 Male 1.97 1.94 NA ## 557 3987 Female 2.51 NA NA ## 558 3988 Female 2.05 NA NA ## 559 3989 Female 1.29 1.00 0.82 ## 560 3990 Female 2.05 NA NA ## 561 3991 Female 2.23 NA NA ## 562 3992 Female 1.76 NA NA ## 563 3993 Male 1.05 NA NA ## 564 3994 Male 1.79 NA NA ## 565 3995 Female 1.02 0.73 0.47 ## 566 3996 Female 2.76 1.58 NA ## 567 3997 Female 1.67 0.55 NA ## 568 4010 Male 2.85 NA NA ## 569 4011 Female 0.23 0.84 NA ## 570 4012 Female 1.90 NA NA ## 571 4013 Male 1.23 0.52 NA ## 572 4014 Male 1.97 NA NA ## 573 4015 Female 1.50 NA NA ## 574 4016 Female 3.69 NA NA ## 575 4017 Female 0.50 NA NA ## 576 4022 Female 2.18 NA NA ## 577 4023 Female 2.17 NA NA ## 578 4024 Female 1.58 NA NA ## 579 4025 Male 2.88 NA NA ## 580 4030 Male 2.52 NA NA ## 581 4032 Female 2.20 NA NA ## 582 4033 Male 1.73 NA NA ## 583 4034 Female 2.23 NA NA ## 584 4079 Male 1.97 NA NA ## 585 4089 Male 1.20 0.67 0.91 ## 586 4090 Male 2.00 NA NA ## 587 4092 Female 1.91 NA NA ## 588 4093 Male 0.81 NA NA ## 589 4095 Female 1.31 NA NA ## 590 4096 Female 0.38 NA NA ## 591 4101 Female 1.97 NA NA ## 592 4104 Female 0.38 NA NA ## 593 4105 Female 2.11 0.76 0.85 ## 594 4106 Female 3.20 NA NA ## 595 4107 Female 0.02 NA NA ## 596 4109 Female 2.56 NA NA ## 597 4110 Female 2.02 NA NA ## 598 4111 Female 2.30 NA NA ## 599 4114 Male 2.02 NA NA ## 600 4147 Female 2.05 NA NA ## 601 4148 Female 1.70 NA NA ## 602 4149 Male 1.61 NA NA ## 603 4150 Male 0.73 NA NA ## 604 4151 Male 2.50 1.64 NA ## 605 4152 Female 2.18 1.75 1.91 ## 606 4153 Female 2.46 1.08 1.91 ## 607 4154 Female 1.50 0.91 NA ## 608 4155 Female 1.73 0.94 NA ## 609 4156 Male 1.44 NA NA ## 610 4157 Male 1.64 0.32 0.52 ## 611 4158 Female 2.02 2.44 NA ## 612 4159 Female 1.20 0.17 0.20 ## 613 4160 Male 0.38 0.02 NA ## 614 4161 Female 1.58 1.54 1.76 ## 615 4162 Female 1.67 0.50 NA ## 616 4163 Female 1.00 0.48 NA ## 617 4164 Female 2.58 1.35 NA ## 618 4165 Female 2.82 2.61 3.02 ## 619 4166 Female 2.29 2.05 NA ## 620 4167 Female 1.14 NA NA ## 621 4168 Female 1.64 0.76 NA ## 622 4171 Female 1.82 0.08 NA ## 623 4172 Female 3.32 2.91 3.02 ## 624 4193 Male 3.32 NA NA ## 625 4194 Male 1.85 1.00 1.50 ## 626 4195 Male 2.29 NA NA ## 627 4196 Female 1.47 0.47 1.58 ## 628 4197 Male 2.08 0.70 0.67 ## 629 4198 Male 2.20 NA NA ## 630 4199 Female 1.06 1.45 NA ## 631 4216 Female 0.97 0.14 0.02 ## 632 4218 Male 2.00 NA NA ## 633 4219 Female 1.67 0.38 NA ## 634 4220 Female 2.94 NA NA ## 635 4221 Female 1.55 NA NA ## 636 4222 Male 0.88 0.26 NA ## 637 4223 Female 1.35 2.32 NA ## 638 4224 Female 0.61 0.20 0.17 ## 639 4225 Male 1.00 NA NA ## 640 4226 Female 1.52 2.72 0.52 ## 641 4234 Male 1.00 NA NA ## 642 4235 Male 1.76 0.41 0.88 ## 643 4236 Female 2.52 NA NA ## 644 4237 Female 2.00 0.88 1.44 ## 645 4238 Female 2.63 NA NA ## 646 4239 Female 0.73 0.85 NA ## 647 4240 Male 1.58 0.23 1.66 ## 648 4241 Female 0.58 NA NA ## 649 4245 Female 1.67 NA NA ## 650 4247 Female 1.47 NA NA ## 651 4249 Female 1.81 NA NA ## 652 4256 Male 1.91 NA NA ## 653 4257 Male 1.06 NA NA ## 654 4259 Male 1.47 1.23 0.94 ## 655 4262 Male 2.52 NA NA ## 656 4263 Female 1.85 0.20 NA ## 657 4264 Male 3.44 NA NA ## 658 4265 Female 1.55 1.32 1.19 ## 659 4266 Female 2.29 2.70 NA ## 660 4267 Female 1.76 NA NA ## 661 4268 Male 1.90 NA NA ## 662 4269 Male 2.52 2.55 NA ## 663 4271 Female 2.52 NA NA ## 664 4274 Female 2.82 0.17 NA ## 665 4281 Female 2.02 NA NA ## 666 4284 Female 1.29 NA NA ## 667 4285 Male 1.26 NA NA ## 668 4287 Female 0.94 NA NA ## 669 4291 Female 2.00 1.13 0.53 ## 670 4292 Female 0.73 NA NA ## 671 4294 Male 2.26 0.79 NA ## 672 4295 Female 2.23 NA NA ## 673 4297 Male 2.35 NA NA ## 674 4298 Female 0.55 0.38 NA ## 675 4301 Female 1.85 NA NA ## 676 4302 Male 0.67 NA NA ## 677 4305 Male 1.85 1.00 NA ## 678 4306 Male 1.23 0.20 NA ## 679 4308 Female 2.35 NA NA ## 680 4309 Male 1.35 NA NA ## 681 4310 Female 1.94 NA NA ## 682 4311 Male 1.55 NA NA ## 683 4312 Female 1.29 NA NA ## 684 4313 Male 2.17 NA NA ## 685 4314 Female 1.91 NA NA ## 686 4333 Female 2.88 NA NA ## 687 4334 Female 2.36 NA NA ## 688 4335 Female 2.36 NA NA ## 689 4336 Female 2.20 NA NA ## 690 4349 Female 2.17 0.47 0.38 ## 691 4351 Male 0.52 NA NA ## 692 4352 Male 0.32 NA NA ## 693 4353 Female 1.52 0.55 NA ## 694 4354 Male 2.00 0.94 0.08 ## 695 4355 Male 1.32 1.02 1.20 ## 696 4356 Female 2.05 NA NA ## 697 4357 Female 1.73 NA NA ## 698 4358 Male 1.94 NA NA ## 699 4359 Female 1.81 NA NA ## 700 4360 Male 0.90 0.64 NA ## 701 4361 Male 1.58 0.67 NA ## 702 4363 Female 2.29 1.87 0.91 ## 703 4366 Female 2.57 NA NA ## 704 4370 Female 1.58 NA NA ## 705 4382 Female 2.33 0.82 NA ## 706 4383 Female 3.15 NA NA ## 707 4384 Female 2.29 NA NA ## 708 4385 Female 0.82 NA NA ## 709 4386 Female 1.93 NA NA ## 710 4387 Female 1.82 NA NA ## 711 4388 Female 1.96 NA NA ## 712 4389 Female 1.32 0.64 0.47 ## 713 4391 Female 1.02 NA NA ## 714 4392 Female 1.14 NA NA ## 715 4393 Female 2.32 NA NA ## 716 4394 Male 2.16 NA NA ## 717 4396 Female 2.42 1.70 NA ## 718 4397 Female 1.14 NA NA ## 719 4398 Male 1.55 0.79 1.76 ## 720 4404 Female 1.17 NA NA ## 721 4405 Female 1.00 0.58 0.32 ## 722 4410 Female 1.05 0.11 NA ## 723 4419 Female 1.38 NA NA ## 724 4428 Female 1.93 2.42 NA ## 725 4430 Female 2.73 NA NA ## 726 4435 Female 2.02 NA NA ## 727 4436 Female 2.81 NA NA ## 728 4437 Female 2.47 NA NA ## 729 4439 Male 1.35 NA NA ## 730 4440 Male 2.08 NA NA ## 731 4441 Female 2.50 NA NA ## 732 4442 Female 2.45 NA NA ## 733 4443 Female 2.17 NA NA ## 734 4444 Male 1.70 0.00 NA ## 735 4445 Male 0.70 0.23 0.45 ## 736 4446 Male 1.51 NA NA ## 737 4447 Female 1.23 NA NA ## 738 4448 Female 2.14 0.85 NA ## 739 4452 Male 1.14 1.14 NA ## 740 4453 Female 0.96 NA NA ## 741 4454 Male 1.52 1.14 1.26 ## 742 4455 Male 0.52 NA NA ## 743 4456 Male 1.56 NA NA ## 744 4459 Female 3.29 0.26 NA ## 745 4464 Male 0.45 NA NA ## 746 4467 Female 2.63 NA NA ## 747 4468 Female 1.70 NA NA ## 748 4470 Male 3.11 NA NA ## 749 4479 Female 1.82 NA NA ## 750 4481 Female 1.58 0.14 NA ## 751 4482 Female 2.73 NA NA ## 752 4485 Female 1.50 1.14 NA ## 753 4486 Male 1.78 1.02 NA ## 754 4488 Female 2.02 NA NA ## 755 4507 Male 0.67 0.94 0.91 ## 756 4509 Female 1.41 0.55 NA ## 757 4510 Male 0.90 NA NA ## 758 4511 Female 1.23 1.11 0.76 ## 759 4512 Female 2.70 NA NA ## 760 4514 Female 1.97 NA NA ## 761 4515 Male 0.84 NA NA ## 762 4516 Male 1.79 NA NA ## 763 4517 Female 2.84 NA NA ## 764 4518 Male 2.02 NA NA ## 765 4519 Male 1.64 0.70 NA ## 766 4525 Female 1.08 NA NA ## 767 4533 Female 2.97 NA NA ## 768 4552 Female 0.94 NA NA ## 769 4553 Female 2.97 1.94 NA ## 770 4554 Female 0.97 NA NA ## 771 4555 Male 1.47 NA NA ## 772 4559 Female 2.61 NA NA ## 773 4563 Female 1.73 0.20 0.91 ## 774 4564 Female 3.38 3.44 3.41 ## 775 4569 Male 3.17 1.00 1.73 ## 776 4570 Female 2.20 0.91 NA ## 777 4571 Female 2.14 NA NA ## 778 4582 Male 1.29 1.58 0.81 ## 779 4590 Female 3.21 2.85 NA ## 780 4597 Male 2.67 NA NA ## 781 4598 Male 1.85 0.79 NA ## 782 4601 Female 1.35 NA NA ## 783 4607 Female 2.14 0.76 NA ## 784 4611 Male 1.24 0.56 NA ## 785 4654 Female 2.02 1.78 2.55 ## 786 4655 Female 2.32 NA NA ## 787 4663 Male 1.08 0.23 NA ## 788 4666 Male 1.14 1.35 1.02 ## 789 4690 Female 2.14 1.82 NA ## 790 4692 Female 2.88 NA NA ## 791 4693 Male 1.35 0.17 0.44 ## 792 4694 Male 1.00 1.70 NA ## 793 4695 Male 2.02 NA NA ## 794 4696 Female 0.64 1.32 NA ## 795 4697 Male 0.29 0.14 NA ## 796 4698 Female 1.73 0.94 NA ## 797 4699 Female 1.82 1.52 NA ## 798 4704 Female 2.11 NA NA ## 799 4710 Female 1.23 NA NA ## 800 4711 Male 0.64 NA NA ## 801 4724 Male 2.23 1.41 2.11 ## 802 4725 Female 2.44 0.32 NA ## 803 4740 Male 1.17 0.58 NA ## 804 4744 Female 0.61 0.44 NA ## 805 4749 Female 0.52 NA NA ## 806 4756 Female 2.91 0.94 NA ## 807 4758 Female 2.61 1.44 NA ## 808 4759 Female 1.47 NA NA ## 809 4760 Male 1.28 NA NA ## 810 4765 Female 1.26 NA NA hist_day1 <- dlf %>% ggplot(aes(x = day1)) + geom_histogram(aes(y = ..density..), binwidth = 0.2, color = "black", fill = "white")+ geom_density(alpha = 0.2, fill = "#FF6666")+ labs(x = "Hygiene score on day 1", y = "Density") hist_day1 hist_day1 + stat_function(fun = dnorm, args = list( mean = mean( dlf$day1, na.rm = T), sd = sd(dlf$day1, na.rm = T)), color= "black", size = 1) hist_day2 <- dlf %>% filter(!is.na(day2)) %>% ggplot(aes(x = day2)) + geom_histogram(aes(y = ..density..), binwidth = 0.1, color = "black", fill = "white")+ stat_function(fun = dnorm, args = list( mean = mean( dlf$day2, na.rm = T), sd = sd(dlf$day2, na.rm = T)), color= "black", size = 1) + labs(x = "Hygiene score on day 2", y = "Density") hist_day2 hist_day3 <- dlf %>% filter(!is.na(day3)) %>% ggplot(aes(x = day3)) + geom_histogram(aes(y = ..density..), binwidth = 0.25, color = "black", fill = "white")+ stat_function(fun = dnorm, args = list( mean = mean( dlf$day3, na.rm = T), sd = sd(dlf$day3, na.rm = T)), color= "black", size = 1) + labs(x = "Hygiene score on day 3", y = "Density") hist_day3 dlf %>% filter(!is.na(day1)) %>% ggplot(aes(sample = day1)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical", y = "Density") dlf %>% filter(!is.na(day2)) %>% ggplot(aes(sample = day2)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical", y = "Density") dlf %>% filter(!is.na(day3)) %>% ggplot(aes(sample = day3)) + stat_qq() + stat_qq_line() + labs(x = "Theoretical", y = "Density") describe(dlf$day1)
## dlf$day1 ## n missing distinct Info Mean Gmd .05 .10 ## 810 0 199 1 1.771 0.7896 0.5945 0.8490 ## .25 .50 .75 .90 .95 ## 1.3125 1.7900 2.2300 2.6700 2.9055 ## ## lowest : 0.02 0.05 0.11 0.23 0.26, highest: 3.38 3.41 3.44 3.58 3.69 pastecs::stat.desc(dlf$day1, basic = F,norm = T)
##       median         mean      SE.mean CI.mean.0.95          var
##     1.790000     1.771136     0.024368     0.047833     0.480996
##      std.dev     coef.var     skewness     skew.2SE     kurtosis
##     0.693539     0.391579    -0.004428    -0.025774    -0.421594
##     kurt.2SE   normtest.W   normtest.p
##    -1.228385     0.995915     0.031985
describe(dlf[,c('day1', 'day2', 'day3')])
## dlf[, c("day1", "day2", "day3")]
##
##  3  Variables      810  Observations
## ---------------------------------------------------------------------------
## day1
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##      810        0      199        1    1.771   0.7896   0.5945   0.8490
##      .25      .50      .75      .90      .95
##   1.3125   1.7900   2.2300   2.6700   2.9055
##
## lowest : 0.02 0.05 0.11 0.23 0.26, highest: 3.38 3.41 3.44 3.58 3.69
## ---------------------------------------------------------------------------
## day2
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##      264      546      102        1   0.9609   0.7818    0.140    0.200
##      .25      .50      .75      .90      .95
##    0.410    0.790    1.350    2.026    2.440
##
## lowest : 0.00 0.02 0.05 0.06 0.08, highest: 2.91 3.00 3.21 3.35 3.44
## ---------------------------------------------------------------------------
## day3
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##      123      687       65    0.999   0.9765   0.7754    0.170    0.266
##      .25      .50      .75      .90      .95
##    0.440    0.760    1.525    1.904    2.146
##
## lowest : 0.02 0.08 0.11 0.14 0.17, highest: 2.29 2.55 2.70 3.02 3.41
## ---------------------------------------------------------------------------
pastecs::stat.desc(dlf[,c('day1', 'day2', 'day3')],
basic = F, norm = T)
##                   day1      day2      day3
## median        1.790000 7.900e-01 7.600e-01
## mean          1.771136 9.609e-01 9.765e-01
## SE.mean       0.024368 4.436e-02 6.404e-02
## CI.mean.0.95  0.047833 8.735e-02 1.268e-01
## var           0.480996 5.195e-01 5.045e-01
## std.dev       0.693539 7.208e-01 7.103e-01
## coef.var      0.391579 7.501e-01 7.274e-01
## skewness     -0.004428 1.083e+00 1.008e+00
## skew.2SE     -0.025774 3.612e+00 2.309e+00
## kurtosis     -0.421594 7.555e-01 5.945e-01
## kurt.2SE     -1.228385 1.265e+00 6.863e-01
## normtest.W    0.995915 9.083e-01 9.078e-01
## normtest.p    0.031985 1.282e-11 3.804e-07
round(
pastecs::stat.desc(dlf[,c('day1', 'day2', 'day3')],
basic = F, norm = T),
digits = 3
)
##                day1  day2  day3
## median        1.790 0.790 0.760
## mean          1.771 0.961 0.977
## SE.mean       0.024 0.044 0.064
## CI.mean.0.95  0.048 0.087 0.127
## var           0.481 0.520 0.504
## std.dev       0.694 0.721 0.710
## coef.var      0.392 0.750 0.727
## skewness     -0.004 1.083 1.008
## skew.2SE     -0.026 3.612 2.309
## kurtosis     -0.422 0.755 0.595
## kurt.2SE     -1.228 1.265 0.686
## normtest.W    0.996 0.908 0.908
## normtest.p    0.032 0.000 0.000
(rexam <- read.delim("RExam.dat", header = TRUE))
##     exam computer lectures numeracy uni
## 1     18       54     75.0        7   0
## 2     30       47      8.5        1   0
## 3     40       58     69.5        6   0
## 4     30       37     67.0        6   0
## 5     40       53     44.5        2   0
## 6     15       48     76.5        8   0
## 7     36       49     70.0        3   0
## 8     40       49     18.5        7   0
## 9     63       45     43.5        4   0
## 10    31       62    100.0        6   0
## 11    22       67     48.0        3   0
## 12    47       62     10.5        3   0
## 13    38       38     57.5        1   0
## 14    34       37     61.5        8   0
## 15    54       54     54.0        4   0
## 16    35       48     71.0        5   0
## 17    33       48     14.0        9   0
## 18    38       42     55.5        3   0
## 19    29       57     72.5        2   0
## 20    36       55     38.0        4   0
## 21    59       41     40.0        1   0
## 22    31       42     85.5        4   0
## 23    34       48     52.0        4   0
## 24    28       44      8.0        3   0
## 25    50       42     62.5        6   0
## 26    59       42     70.5        3   0
## 27    33       40     98.0        4   0
## 28    57       52     34.5        2   0
## 29    25       56     62.5        3   0
## 30    53       54     91.5        2   0
## 31    65       52     97.5        7   0
## 32    47       55     31.5        2   0
## 33    28       61     80.5        7   0
## 34    43       56     66.5        4   0
## 35    47       52     57.5        2   0
## 36    60       49     67.0        4   0
## 37    45       43     48.5        4   0
## 38    22       51     61.0        4   0
## 39    39       49     76.0        3   0
## 40    43       56     30.5        2   0
## 41    66       41     45.0        2   0
## 42    36       67     21.5        4   0
## 43    26       35     72.5        5   0
## 44    58       49     66.0        8   0
## 45    53       62     90.5        6   0
## 46    37       66     48.5        4   0
## 47    48       48     62.0        3   0
## 48    32       46     49.0        1   0
## 49    42       46     60.0        5   0
## 50    34       58     21.0        5   0
## 51    56       30     84.5        7   1
## 52    76       48     51.0        8   1
## 53    72       54     58.5        5   1
## 54    77       44     42.0        6   1
## 55    77       54     65.5        9   1
## 56    66       58     56.0        7   1
## 57    62       59     71.5        2   1
## 58    86       54     48.5        5   1
## 59    97       35     84.5        5   1
## 60    72       56     47.5        2   1
## 61    69       53     54.0        3   1
## 62    87       56     70.5        6   1
## 63    88       65     73.0        5   1
## 64    72       50     79.0       12   1
## 65    75       39     82.5        8   1
## 66    74       40     74.5        3   1
## 67    68       50     85.0        2   1
## 68    81       57     69.5       10   1
## 69    77       39     42.0        7   1
## 70    71       41     43.0        8   1
## 71    60       48     46.5        6   1
## 72    74       46     36.5        8   1
## 73    80       54     72.5        4   1
## 74    68       55     62.0        4   1
## 75    64       27     81.5        5   1
## 76    94       57    100.0       13   1
## 77    65       73     27.0       14   1
## 78    72       54     59.5        2   1
## 79    75       54     75.0        3   1
## 80    92       50     34.0        2   1
## 81    89       56     78.0        4   1
## 82    83       57     80.5        5   1
## 83    80       54     84.0        2   1
## 84    95       55     37.5        4   1
## 85    99       54     57.0        3   1
## 86    80       52     66.0        8   1
## 87    81       67     59.0       10   1
## 88    75       44     68.5        5   1
## 89    78       57     88.5        3   1
## 90    65       54     55.0        8   1
## 91    80       51     86.0        5   1
## 92    86       55     68.5       10   1
## 93    73       51     64.0        7   1
## 94    81       45     12.5        1   1
## 95    69       59     52.5        7   1
## 96    60       43     37.0        5   1
## 97    69       57     46.0        2   1
## 98    71       50     97.5        2   1
## 99    82       50     70.5        4   1
## 100   58       47     78.0        3   1
rexam$uni <- factor(rexam$uni,
levels = c(0:1),
labels = c('Duncetown University', 'Sussex University'))
head(rexam)
##   exam computer lectures numeracy                  uni
## 1   18       54     75.0        7 Duncetown University
## 2   30       47      8.5        1 Duncetown University
## 3   40       58     69.5        6 Duncetown University
## 4   30       37     67.0        6 Duncetown University
## 5   40       53     44.5        2 Duncetown University
## 6   15       48     76.5        8 Duncetown University
str(rexam)
## 'data.frame':    100 obs. of  5 variables:
##  $exam : int 18 30 40 30 40 15 36 40 63 31 ... ##$ computer: int  54 47 58 37 53 48 49 49 45 62 ...
##  $lectures: num 75 8.5 69.5 67 44.5 76.5 70 18.5 43.5 100 ... ##$ numeracy: int  7 1 6 6 2 8 3 7 4 6 ...
##  $uni : Factor w/ 2 levels "Duncetown University",..: 1 1 1 1 1 1 1 1 1 1 ... round( pastecs::stat.desc(rexam[,c("exam", "computer", "lectures", "numeracy")], basic = F, norm = T), digits = 3 ) ## exam computer lectures numeracy ## median 60.000 51.500 62.000 4.000 ## mean 58.100 50.710 59.765 4.850 ## SE.mean 2.132 0.826 2.168 0.271 ## CI.mean.0.95 4.229 1.639 4.303 0.537 ## var 454.354 68.228 470.230 7.321 ## std.dev 21.316 8.260 21.685 2.706 ## coef.var 0.367 0.163 0.363 0.558 ## skewness -0.104 -0.169 -0.410 0.933 ## skew.2SE -0.215 -0.350 -0.849 1.932 ## kurtosis -1.148 0.221 -0.285 0.763 ## kurt.2SE -1.200 0.231 -0.298 0.798 ## normtest.W 0.961 0.987 0.977 0.924 ## normtest.p 0.005 0.441 0.077 0.000 We came across these measures earlier on and found that we can interpret absolute values of kurt.2SE and skew.2SE greater than 1, 1.29, and 1.65 as significant p < .05, p < .01, and p < .001, respectively. We can see that for skew, numeracy scores are significantly positively skewed (p < .001) indicating a pile-up of scores on the left of the distribution (so most students got low scores). For kurtosis, prior exam scores are significant (p < .05). rexam %>% filter(!is.na(exam)) %>% ggplot(aes(exam)) + geom_histogram(aes(y = ..density..), binwidth = 6, color = "black", fill = "white") + stat_function(fun = dnorm, args = list(mean = mean(rexam$exam, na.rm = T),
sd = sd(rexam$exam, na.rm = T)), color = "black", size = 1)+ labs(x = "First Year Exam Scores", y = "Density") rexam %>% filter(!is.na(computer)) %>% ggplot(aes(computer)) + geom_histogram(aes(y = ..density..), binwidth = 6, color = "black", fill = "white")+ stat_function(fun = dnorm, args = list(mean = mean(rexam$computer, na.rm = T),
sd = sd(rexam$computer, na.rm = T)), color = "black", size = 1)+ labs(x = "Computer Literacy", y = "Density") rexam %>% filter(!is.na(lectures)) %>% ggplot(aes(lectures)) + geom_histogram(aes(y = ..density..), binwidth = 6, color = "black", fill = "white")+ stat_function(fun = dnorm, args = list(mean = mean(rexam$lectures, na.rm = T),
sd = sd(rexam$lectures, na.rm = T)), color = "black", size = 1)+ labs(x = "Percentage of Lectures Attended", y = "Density") rexam %>% filter(!is.na(numeracy)) %>% ggplot(aes(numeracy)) + geom_histogram(aes(y = ..density..), binwidth = 1, color = "black", fill = "white")+ stat_function(fun = dnorm, args = list(mean = mean(rexam$numeracy, na.rm = T),
sd = sd(rexam$numeracy, na.rm = T)), color = "black", size = 1)+ labs(x = "Numeracy", y = "Density") by(rexam, INDICES = rexam$uni, FUN = describe)
## rexam$uni: Duncetown University ## data[x, , drop = FALSE] ## ## 5 Variables 50 Observations ## --------------------------------------------------------------------------- ## exam ## n missing distinct Info Mean Gmd .05 .10 ## 50 0 33 0.999 40.18 14.41 22.00 25.90 ## .25 .50 .75 .90 .95 ## 31.25 38.00 47.75 59.00 61.65 ## ## lowest : 15 18 22 25 26, highest: 59 60 63 65 66 ## --------------------------------------------------------------------------- ## computer ## n missing distinct Info Mean Gmd .05 .10 ## 50 0 25 0.997 50.26 9.264 37.45 40.90 ## .25 .50 .75 .90 .95 ## 44.25 49.00 55.75 62.00 64.20 ## ## lowest : 35 37 38 40 41, highest: 58 61 62 66 67 ## --------------------------------------------------------------------------- ## lectures ## n missing distinct Info Mean Gmd .05 .10 ## 50 0 45 1 56.26 27.08 12.08 20.75 ## .25 .50 .75 .90 .95 ## 43.75 60.50 70.88 86.00 94.80 ## ## lowest : 8.0 8.5 10.5 14.0 18.5, highest: 90.5 91.5 97.5 98.0 100.0 ## --------------------------------------------------------------------------- ## numeracy ## n missing distinct Info Mean Gmd ## 50 0 9 0.974 4.12 2.335 ## ## Value 1 2 3 4 5 6 7 8 9 ## Frequency 4 8 9 12 4 5 4 3 1 ## Proportion 0.08 0.16 0.18 0.24 0.08 0.10 0.08 0.06 0.02 ## --------------------------------------------------------------------------- ## uni ## n missing distinct ## 50 0 1 ## value ## Duncetown University ## ## Value Duncetown University ## Frequency 50 ## Proportion 1 ## --------------------------------------------------------------------------- ## -------------------------------------------------------- ## rexam$uni: Sussex University
## data[x, , drop = FALSE]
##
##  5  Variables      50  Observations
## ---------------------------------------------------------------------------
## exam
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##       50        0       30    0.998    76.02    11.64    60.00    63.80
##      .25      .50      .75      .90      .95
##    69.00    75.00    81.00    89.30    94.55
##
## lowest : 56 58 60 62 64, highest: 92 94 95 97 99
## ---------------------------------------------------------------------------
## computer
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##       50        0       25    0.992    51.16    9.151    36.80    39.90
##      .25      .50      .75      .90      .95
##    47.25    54.00    56.00    58.10    62.30
##
## lowest : 27 30 35 39 40, highest: 58 59 65 67 73
## ---------------------------------------------------------------------------
## lectures
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##       50        0       45        1    63.27     21.7    35.12    37.45
##      .25      .50      .75      .90      .95
##    49.12    65.75    78.00    84.55    87.38
##
## lowest :  12.5  27.0  34.0  36.5  37.0, highest:  85.0  86.0  88.5  97.5 100.0
## ---------------------------------------------------------------------------
## numeracy
##        n  missing distinct     Info     Mean      Gmd      .05      .10
##       50        0       13    0.985     5.58    3.429     2.00     2.00
##      .25      .50      .75      .90      .95
##     3.00     5.00     7.75    10.00    11.10
##
## Value         1    2    3    4    5    6    7    8    9   10   12   13
## Frequency     1    8    6    5    9    3    5    6    1    3    1    1
## Proportion 0.02 0.16 0.12 0.10 0.18 0.06 0.10 0.12 0.02 0.06 0.02 0.02
##
## Value        14
## Frequency     1
## Proportion 0.02
## ---------------------------------------------------------------------------
## uni
##                 n           missing          distinct             value
##                50                 0                 1 Sussex University
##
## Value      Sussex University
## Frequency                 50
## Proportion                 1
## ---------------------------------------------------------------------------
by(rexam, INDICES = rexam$uni, FUN = pastecs::stat.desc, basic = F, norm = T) ## rexam$uni: Duncetown University
##                  exam computer lectures numeracy uni
## median        38.0000  49.0000  60.5000  4.00000  NA
## mean          40.1800  50.2600  56.2600  4.12000  NA
## SE.mean        1.7803   1.1410   3.3619  0.29227  NA
## CI.mean.0.95   3.5777   2.2929   6.7561  0.58733  NA
## var          158.4771  65.0943 565.1351  4.27102  NA
## std.dev       12.5888   8.0681  23.7726  2.06664  NA
## coef.var       0.3133   0.1605   0.4225  0.50161  NA
## skewness       0.2907   0.2121  -0.2904  0.48166  NA
## skew.2SE       0.4318   0.3151  -0.4314  0.71548  NA
## kurtosis      -0.7231  -0.6779  -0.5635 -0.65166  NA
## kurt.2SE      -0.5462  -0.5121  -0.4257 -0.49226  NA
## normtest.W     0.9722   0.9776   0.9697  0.94082  NA
## normtest.p     0.2829   0.4571   0.2259  0.01452  NA
## --------------------------------------------------------
## rexam$uni: Sussex University ## exam computer lectures numeracy uni ## median 75.0000 54.00000 65.7500 5.000000 NA ## mean 76.0200 51.16000 63.2700 5.580000 NA ## SE.mean 1.4432 1.20284 2.6827 0.434333 NA ## CI.mean.0.95 2.9002 2.41720 5.3911 0.872824 NA ## var 104.1424 72.34122 359.8491 9.432245 NA ## std.dev 10.2050 8.50536 18.9697 3.071196 NA ## coef.var 0.1342 0.16625 0.2998 0.550394 NA ## skewness 0.2560 -0.50635 -0.3429 0.746369 NA ## skew.2SE 0.3803 -0.75216 -0.5094 1.108686 NA ## kurtosis -0.4610 0.96405 -0.4234 -0.006440 NA ## kurt.2SE -0.3482 0.72823 -0.3198 -0.004865 NA ## normtest.W 0.9837 0.94392 0.9817 0.932346 NA ## normtest.p 0.7151 0.01931 0.6263 0.006787 NA by(rexam[, c("exam", "numeracy")], INDICES = rexam$uni,
FUN = pastecs::stat.desc, basic = F, norm = T)
## rexam$uni: Duncetown University ## exam numeracy ## median 38.0000 4.00000 ## mean 40.1800 4.12000 ## SE.mean 1.7803 0.29227 ## CI.mean.0.95 3.5777 0.58733 ## var 158.4771 4.27102 ## std.dev 12.5888 2.06664 ## coef.var 0.3133 0.50161 ## skewness 0.2907 0.48166 ## skew.2SE 0.4318 0.71548 ## kurtosis -0.7231 -0.65166 ## kurt.2SE -0.5462 -0.49226 ## normtest.W 0.9722 0.94082 ## normtest.p 0.2829 0.01452 ## -------------------------------------------------------- ## rexam$uni: Sussex University
##                  exam  numeracy
## median        75.0000  5.000000
## mean          76.0200  5.580000
## SE.mean        1.4432  0.434333
## CI.mean.0.95   2.9002  0.872824
## var          104.1424  9.432245
## std.dev       10.2050  3.071196
## coef.var       0.1342  0.550394
## skewness       0.2560  0.746369
## skew.2SE       0.3803  1.108686
## kurtosis      -0.4610 -0.006440
## kurt.2SE      -0.3482 -0.004865
## normtest.W     0.9837  0.932346
## normtest.p     0.7151  0.006787
duncetown <- rexam %>% filter(!is.na(numeracy) & uni == "Duncetown University")
ggplot(duncetown ,aes(numeracy)) +
geom_histogram(aes(y = ..density..),
binwidth = 1,
color = "black",
fill = "white")+
stat_function(fun = dnorm,
args = list(mean = mean(duncetown$numeracy) , sd = sd(duncetown$numeracy) ),
color = "black",
size = 1)+
labs(x = "Numeracy Score",
y = "Density")

sussex <- rexam %>% filter(!is.na(numeracy) & uni == "Sussex University")
ggplot(sussex ,aes(numeracy)) +
geom_histogram(aes(y = ..density..),
binwidth = 1,
color = "black",
fill = "white")+
stat_function(fun = dnorm,
args = list(mean = mean(sussex$numeracy) , sd = sd(sussex$numeracy) ),
color = "black",
size = 1)+
labs(x = "Numeracy Score",
y = "Density")

The Shapiro–Wilk test does just this: it compares the scores in the sample to a normally distributed set of scores with the same mean and standard deviation. If the test is non-significant (p > .05) it tells us that the distribution of the sample is not significantly different from a normal distribution. If, however, the test is significant (p < .05) then the distribution in question is significantly different from a normal distribution (i.e., it is non-normal). This test seems great: in one easy procedure it tells us whether our scores are normally distributed (nice!). However, it has limitations because with large sample sizes it is very easy to get significant results from small deviations from normality, and so a significant test doesn’t necessarily tell us whether the deviation from normality is enough to bias any statistical procedures that we apply to the data.

shapiro.test(rexam$exam) ## ## Shapiro-Wilk normality test ## ## data: rexam$exam
## W = 0.96, p-value = 0.005
shapiro.test(rexam$numeracy) ## ## Shapiro-Wilk normality test ## ## data: rexam$numeracy
## W = 0.92, p-value = 2e-05
##### Shapiro–Wilk tests for the two universities
by(
rexam$exam, INDICES = rexam$uni, FUN = shapiro.test
)
## rexam$uni: Duncetown University ## ## Shapiro-Wilk normality test ## ## data: dd[x, ] ## W = 0.97, p-value = 0.3 ## ## -------------------------------------------------------- ## rexam$uni: Sussex University
##
##  Shapiro-Wilk normality test
##
## data:  dd[x, ]
## W = 0.98, p-value = 0.7

Normal within the two groups (the p-values are greater than .05)

by(
rexam$numeracy, INDICES = rexam$uni, FUN = shapiro.test
)
## rexam$uni: Duncetown University ## ## Shapiro-Wilk normality test ## ## data: dd[x, ] ## W = 0.94, p-value = 0.01 ## ## -------------------------------------------------------- ## rexam$uni: Sussex University
##
##  Shapiro-Wilk normality test
##
## data:  dd[x, ]
## W = 0.93, p-value = 0.007

For numeracy scores the tests are still significant indicating non-normal distributions both for Duncetown University (p = .015), and Sussex University (p = .007).

rexam %>% filter(!is.na(exam)) %>%
ggplot(aes(sample = exam)) +
stat_qq() +
stat_qq_line() +
labs(x = "Theoretical",
y = "Sample")

rexam %>% filter(!is.na(numeracy)) %>%
ggplot(aes(sample = numeracy)) +
stat_qq() +
stat_qq_line() +
labs(x = "Theoretical",
y = "sample")

The test statistic for the Shapiro–Wilk test is denoted by $$W$$; we can report the results in Output 5.5 in the following way:

• The percentage on the R exam, W = 0.96, p = .005, and the numeracy scores, W = 0.92, p < .001, were both significantly non-normal.

### Normality tests

• The Shapiro–Wilk test can be used to see if a distribution of scores significantly differs from a normal distribution.
• If the Shapiro–Wilk test is significant (p-value less than .05) then the scores are significantly different from a normal distribution.
• Otherwise, scores are approximately normally distributed.
• Warning: In large samples this test can be significant even when the scores are only slightly different from a normal distribution. Therefore, they should always be interpreted in conjunction with histograms, or Q-Q plots, and the values of skew and kurtosis.
str(rexam)
## 'data.frame':    100 obs. of  5 variables:
##  $exam : int 18 30 40 30 40 15 36 40 63 31 ... ##$ computer: int  54 47 58 37 53 48 49 49 45 62 ...
##  $lectures: num 75 8.5 69.5 67 44.5 76.5 70 18.5 43.5 100 ... ##$ numeracy: int  7 1 6 6 2 8 3 7 4 6 ...
##  $uni : Factor w/ 2 levels "Duncetown University",..: 1 1 1 1 1 1 1 1 1 1 ... ### Levene’s test with R Levene’s test tests the null hypothesis that the variances in different groups are equal (i.e., the difference between the variances is zero). For now, all we need to know is that if Levene’s test is significant at p ≤ .05 then we can conclude that the null hypothesis is incorrect and that the variances are significantly different – therefore, the assumption of homogeneity of variances has been violated. If, however, Levene’s test is non-significant (i.e., p > .05) then the variances are roughly equal and the assumption is tenable To use Levene’s test, we use the leveneTest() function from the (???) package. # default centre median car::leveneTest(rexam$exam, rexam$uni) ## Levene's Test for Homogeneity of Variance (center = median) ## Df F value Pr(>F) ## group 1 2.09 0.15 ## 98 car::leveneTest(rexam$exam, rexam$uni, center = mean) ## Levene's Test for Homogeneity of Variance (center = mean) ## Df F value Pr(>F) ## group 1 2.58 0.11 ## 98 car::leveneTest(rexam$numeracy, rexam\$uni)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1    5.37  0.023
##       98

For the percentage on the R exam, the variances were similar for Duncetown and Sussex University students, 98) = 2.09, ns, but for numeracy scores the variances were significantly different in the two groups, 98) = 5.37, p = .023.

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.1 LTS
##
## Matrix products: default
## BLAS: /home/michael/anaconda3/lib/R/lib/libRblas.so
## LAPACK: /home/michael/anaconda3/lib/R/lib/libRlapack.so
##
## locale:
## [1] en_CA.UTF-8
##
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base
##
## other attached packages:
## [1] bindrcpp_0.2.2       Hmisc_4.1-1          Formula_1.2-3
## [4] survival_2.42-3      lattice_0.20-35      ggplot2_3.0.0
## [7] dplyr_0.7.6          RevoUtils_11.0.1     RevoUtilsMath_11.0.0
##
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.18        assertthat_0.2.0    rprojroot_1.3-2
##  [4] digest_0.6.15       cellranger_1.1.0    R6_2.2.2
##  [7] plyr_1.8.4          backports_1.1.2     acepack_1.4.1
## [10] evaluate_0.11       blogdown_0.9.8      pillar_1.3.0
## [16] curl_3.2            rstudioapi_0.7      data.table_1.11.4
## [19] car_3.0-0           rpart_4.1-13        Matrix_1.2-14
## [22] checkmate_1.8.5     rmarkdown_1.10      labeling_0.3
## [25] splines_3.5.1       stringr_1.3.1       foreign_0.8-70
## [28] htmlwidgets_1.2     munsell_0.5.0       compiler_3.5.1
## [31] xfun_0.4.11         pkgconfig_2.0.1     base64enc_0.1-3
## [34] htmltools_0.3.6     nnet_7.3-12         tidyselect_0.2.4
## [37] tibble_1.4.2        gridExtra_2.3       htmlTable_1.12
## [40] bookdown_0.7        rio_0.5.10          codetools_0.2-15
## [43] crayon_1.3.4        withr_2.1.2         grid_3.5.1
## [46] gtable_0.2.0        magrittr_1.5        scales_0.5.0
## [49] zip_1.0.0           carData_3.0-1       stringi_1.2.4
## [52] latticeExtra_0.6-28 openxlsx_4.1.0      boot_1.3-20
## [55] RColorBrewer_1.1-2  tools_3.5.1         forcats_0.3.0
## [58] glue_1.3.0          purrr_0.2.5         hms_0.4.2
## [61] abind_1.4-5         yaml_2.2.0          colorspace_1.3-2
## [64] cluster_2.0.7-1     knitr_1.20          bindr_0.1.1
## [67] haven_1.1.2         pastecs_1.3.21

# References

knitr::write_bib(.packages(), "packages.bib") 
## tweaking Hmisc

Harrell, Frank E, Jr. 2018. Hmisc: Harrell Miscellaneous. https://CRAN.R-project.org/package=Hmisc.

R Core Team. 2018. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.

Wickham, Hadley, Winston Chang, Lionel Henry, Thomas Lin Pedersen, Kohske Takahashi, Claus Wilke, and Kara Woo. 2018. Ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics. https://CRAN.R-project.org/package=ggplot2.

Wickham, Hadley, Romain François, Lionel Henry, and Kirill Müller. 2018. Dplyr: A Grammar of Data Manipulation. https://CRAN.R-project.org/package=dplyr.