From grades to distribution

GCSE grades are usually reported in a very compact form, where we see which percentage of students got A* and A, and which got A* to C.

This doesn’t tell us much about the distribution and the true average in a school. Here I am using simulation to enhance and compare the rankings. I get national distribution from here and the school numbers from Telegraph.

df <- read.csv('~/R/schools gcse - Sheet1.csv', stringsAsFactors = F)
summary(df)
##       Rank           School              Type              Gender         
##  Min.   :  1.00   Length:182         Length:182         Length:182        
##  1st Qu.: 46.25   Class :character   Class :character   Class :character  
##  Median : 91.50   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 91.50                                                           
##  3rd Qu.:136.75                                                           
##  Max.   :182.00                                                           
##        ac             asa       
##  Min.   :47.30   Min.   : 4.19  
##  1st Qu.:69.62   1st Qu.:23.50  
##  Median :76.00   Median :29.08  
##  Mean   :75.37   Mean   :30.34  
##  3rd Qu.:82.27   3rd Qu.:36.07  
##  Max.   :98.00   Max.   :68.56
head(df)
##   Rank                       School Type Gender    ac   asa
## 1    1               Thomas Telford    C      M 98.00 42.00
## 2    2                     Gordon's    C      M 96.60 45.50
## 3    3            Dame Alice Owen's   PS      M 94.55 68.56
## 4    4                    Edgbarrow    C      M 92.38 34.61
## 5    5              King David High    C      M 91.94 49.68
## 6    6 Hertfordshire and Essex High    C      G 91.93 44.40
df$ac <- df$ac / 100
df$asa <- df$asa / 100

# total  2016   6.5 14.0 21.4 25.0 16.9  8.3  4.2  2.1  1.6  66.9 5240796

# A*=90%    A=80%    B=70%    C=60%    D=50%    E=40%    F=30%    G=20%.

nat <- c(6.5, 14.0, 21.4, 25.0, 16.9,  8.3,  4.2,  2.1,  1.6) / 100
asamonga <- nat[1] / sum(nat[1:2])
bamongbc <- nat[3] / sum(nat[3:4])
damongdefgu <- nat[5] / sum(nat[5:9])

df$bc <- df$ac - df$asa
df$as <- asamonga * df$asa
df$a <- df$asa - df$as
df$b <- df$bc * bamongbc
df$c <- df$bc - df$b
df$defgu <- 1 - df$ac
df$d <- damongdefgu * df$defgu
df$efgu <- df$defgu - df$d

# for each school simulate 1000 kids

set.seed(100)

stats <- sapply(1:nrow(df), function(i) {
  r <- sapply(1:1000, function(j) {
    u <- runif(1)
    row <- df[i, ]
    if (u < row$as) return(runif(1, .9, 1))
    if (u < row$as + row$a) return(runif(1, .8, .9))
    if (u < row$as + row$a + row$b) return(runif(1, .7, .8))
    if (u < row$as + row$a + row$b + row$c) return(runif(1, .6, .7))
    if (u < row$as + row$a + row$b + row$c + row$d) return(runif(1, .5, .6))
    return(runif(1, .2, .5))
  })
  # hist(r, main = i)
  cat(i, mean(r), sd(r), mean(1 * (r > .7)), '\n')
  c(mean(r), sd(r), mean(1 * (r > .7)))
})
## 1 0.7683686 0.1223946 0.672 
## 2 0.7715759 0.1232042 0.694 
## 3 0.8056843 0.1406709 0.8 
## 4 0.7420039 0.1321523 0.616 
## 5 0.7710724 0.1384488 0.696 
## 6 0.7525169 0.1378484 0.647 
## 7 0.7788966 0.1497558 0.731 
## 8 0.7666975 0.1449658 0.683 
## 9 0.7351966 0.1492665 0.61 
## 10 0.7355491 0.1375408 0.598 
## 11 0.7516287 0.1423408 0.658 
## 12 0.7542215 0.1496314 0.662 
## 13 0.7346187 0.1549356 0.615 
## 14 0.7398732 0.1609199 0.612 
## 15 0.7073247 0.1388268 0.516 
## 16 0.7122295 0.1407795 0.522 
## 17 0.7572616 0.1616497 0.668 
## 18 0.7538176 0.1577389 0.676 
## 19 0.710365 0.1366636 0.529 
## 20 0.7017176 0.1451331 0.508 
## 21 0.7447685 0.1539364 0.634 
## 22 0.7304486 0.1593665 0.595 
## 23 0.7316733 0.1587863 0.605 
## 24 0.7163373 0.1585425 0.572 
## 25 0.7310683 0.154584 0.6 
## 26 0.715369 0.1612976 0.561 
## 27 0.7619853 0.1660202 0.68 
## 28 0.7213348 0.1689937 0.582 
## 29 0.7092299 0.1605229 0.54 
## 30 0.7397647 0.1712905 0.608 
## 31 0.7311732 0.1574371 0.597 
## 32 0.7452909 0.1676219 0.641 
## 33 0.7415877 0.1655457 0.638 
## 34 0.7163421 0.1674189 0.583 
## 35 0.7569299 0.1786507 0.686 
## 36 0.7278115 0.16386 0.604 
## 37 0.7311789 0.1737038 0.614 
## 38 0.6966158 0.1551365 0.514 
## 39 0.7496687 0.1698115 0.654 
## 40 0.7373526 0.175278 0.632 
## 41 0.7328526 0.1712844 0.627 
## 42 0.7078175 0.1667427 0.543 
## 43 0.7251205 0.166848 0.593 
## 44 0.7113706 0.1672249 0.541 
## 45 0.7355042 0.1765673 0.62 
## 46 0.7052244 0.156039 0.541 
## 47 0.7002309 0.159446 0.529 
## 48 0.7100308 0.1643818 0.565 
## 49 0.663536 0.127215 0.401 
## 50 0.7211366 0.1779479 0.59 
## 51 0.6988231 0.1565482 0.529 
## 52 0.7141091 0.1729384 0.574 
## 53 0.7230838 0.1691468 0.578 
## 54 0.7180372 0.175405 0.59 
## 55 0.7048383 0.1647505 0.528 
## 56 0.6894581 0.1614176 0.484 
## 57 0.7018428 0.1643696 0.503 
## 58 0.7146036 0.1688164 0.558 
## 59 0.722388 0.1764017 0.592 
## 60 0.7073688 0.1543529 0.516 
## 61 0.6932736 0.1574285 0.514 
## 62 0.6914036 0.1585331 0.501 
## 63 0.7117699 0.1697212 0.564 
## 64 0.7216955 0.1721083 0.591 
## 65 0.7188876 0.1733361 0.567 
## 66 0.7125153 0.1723711 0.554 
## 67 0.7181997 0.1723422 0.561 
## 68 0.6963783 0.1806988 0.529 
## 69 0.7095326 0.1803215 0.567 
## 70 0.7126233 0.1728029 0.555 
## 71 0.7024768 0.1848395 0.549 
## 72 0.6975642 0.1708655 0.529 
## 73 0.6978556 0.1700575 0.517 
## 74 0.6999218 0.1719639 0.516 
## 75 0.6969481 0.1746662 0.525 
## 76 0.724945 0.1804165 0.599 
## 77 0.7115715 0.1745551 0.542 
## 78 0.6950392 0.1809089 0.534 
## 79 0.6940526 0.1639478 0.52 
## 80 0.6814997 0.1702338 0.475 
## 81 0.7113493 0.1764012 0.559 
## 82 0.6807091 0.1831838 0.489 
## 83 0.6827422 0.1684967 0.491 
## 84 0.7008198 0.1781685 0.547 
## 85 0.6909296 0.1692115 0.501 
## 86 0.6892982 0.1834329 0.484 
## 87 0.7171709 0.1794301 0.574 
## 88 0.6835816 0.164339 0.502 
## 89 0.6878648 0.1724486 0.502 
## 90 0.6864698 0.1704906 0.485 
## 91 0.7115024 0.1817323 0.564 
## 92 0.6757976 0.1689367 0.465 
## 93 0.6866085 0.1760865 0.487 
## 94 0.6999403 0.171834 0.541 
## 95 0.684758 0.1671032 0.476 
## 96 0.6965842 0.1816783 0.534 
## 97 0.6610951 0.1656206 0.422 
## 98 0.6862636 0.1613714 0.479 
## 99 0.6914803 0.1789818 0.499 
## 100 0.6888952 0.173641 0.502 
## 101 0.6693878 0.1570846 0.443 
## 102 0.6985946 0.1785776 0.525 
## 103 0.6835057 0.1720917 0.496 
## 104 0.6992069 0.1806466 0.543 
## 105 0.6919625 0.1818359 0.504 
## 106 0.6809498 0.1693425 0.479 
## 107 0.6956557 0.1796154 0.535 
## 108 0.7061388 0.1844397 0.563 
## 109 0.686091 0.1757979 0.504 
## 110 0.694592 0.1774861 0.527 
## 111 0.6882808 0.17066 0.491 
## 112 0.6838849 0.1759428 0.48 
## 113 0.6921637 0.1850556 0.52 
## 114 0.6874704 0.1813795 0.489 
## 115 0.6887098 0.1802252 0.508 
## 116 0.672631 0.1725376 0.462 
## 117 0.6594056 0.1679273 0.426 
## 118 0.6570263 0.1660933 0.422 
## 119 0.6742408 0.1687756 0.471 
## 120 0.6787481 0.167178 0.46 
## 121 0.6742603 0.1790954 0.475 
## 122 0.6908932 0.1862711 0.513 
## 123 0.6661889 0.1782563 0.445 
## 124 0.6787079 0.1800569 0.478 
## 125 0.6887706 0.1788021 0.505 
## 126 0.6883777 0.1923806 0.527 
## 127 0.6714684 0.1837034 0.471 
## 128 0.6928202 0.1807427 0.514 
## 129 0.6661009 0.1815404 0.448 
## 130 0.6869192 0.1805188 0.504 
## 131 0.6550113 0.1692615 0.419 
## 132 0.6705452 0.1824723 0.45 
## 133 0.6705127 0.1718149 0.462 
## 134 0.6786555 0.1882929 0.478 
## 135 0.664383 0.1798818 0.434 
## 136 0.661458 0.1765095 0.437 
## 137 0.6524941 0.1781033 0.414 
## 138 0.6569803 0.1832602 0.428 
## 139 0.6636846 0.1798854 0.447 
## 140 0.6635242 0.1753093 0.44 
## 141 0.6585941 0.1726128 0.418 
## 142 0.6619078 0.18301 0.445 
## 143 0.662795 0.1944851 0.459 
## 144 0.6516961 0.1863328 0.429 
## 145 0.6617119 0.1887102 0.449 
## 146 0.6633577 0.1906342 0.462 
## 147 0.6498617 0.1742126 0.416 
## 148 0.653788 0.1756331 0.404 
## 149 0.6557736 0.1780469 0.429 
## 150 0.6476855 0.1729023 0.393 
## 151 0.6388254 0.1759931 0.385 
## 152 0.661145 0.189371 0.438 
## 153 0.6473794 0.1762792 0.405 
## 154 0.6550975 0.1763963 0.42 
## 155 0.6600462 0.1751826 0.428 
## 156 0.6507472 0.1828157 0.415 
## 157 0.6585622 0.1825717 0.431 
## 158 0.6538436 0.1927439 0.419 
## 159 0.6624928 0.1932488 0.446 
## 160 0.6531746 0.1854843 0.412 
## 161 0.6300412 0.1648541 0.359 
## 162 0.6457217 0.2030982 0.43 
## 163 0.639674 0.1773486 0.382 
## 164 0.6438112 0.1823604 0.398 
## 165 0.6504571 0.1966352 0.433 
## 166 0.7130451 0.2334163 0.618 
## 167 0.6338448 0.1920332 0.4 
## 168 0.6262068 0.1755543 0.361 
## 169 0.6151139 0.1592595 0.303 
## 170 0.639548 0.1965426 0.404 
## 171 0.6424653 0.1899467 0.405 
## 172 0.6198859 0.1779872 0.344 
## 173 0.6270983 0.1825826 0.356 
## 174 0.6004274 0.162012 0.283 
## 175 0.6336433 0.1935467 0.39 
## 176 0.6176384 0.1741328 0.328 
## 177 0.634189 0.1855823 0.372 
## 178 0.6273507 0.1920568 0.377 
## 179 0.6123633 0.1933287 0.343 
## 180 0.6131826 0.1859836 0.339 
## 181 0.6294816 0.190813 0.365 
## 182 0.5936388 0.1871862 0.294
stats <- t(stats)
df[, c('mean', 'sd', 'bup')] <- stats
df$ranknew <- nrow(df) - rank(df$mean) + 1
df$diff <- df$Rank - df$ranknew
df$ranksd <- nrow(df) - rank(df$sd) + 1

# top 6 with new rank
head(df[order(df$ranknew), ]) # prev rank 3 is buch better
##   Rank                      School Type Gender     ac    asa     bc
## 3    3           Dame Alice Owen's   PS      M 0.9455 0.6856 0.2599
## 7    7    Watford Grammar for Boys   PS      B 0.9100 0.5700 0.3400
## 2    2                    Gordon's    C      M 0.9660 0.4550 0.5110
## 5    5             King David High    C      M 0.9194 0.4968 0.4226
## 1    1              Thomas Telford    C      M 0.9800 0.4200 0.5600
## 8    8 St John The Baptist RC Comp    C      M 0.9100 0.4720 0.4380
##          as         a         b         c  defgu          d       efgu
## 3 0.2173854 0.4682146 0.1198677 0.1400323 0.0545 0.02782628 0.02667372
## 7 0.1807317 0.3892683 0.1568103 0.1831897 0.0900 0.04595166 0.04404834
## 2 0.1442683 0.3107317 0.2356767 0.2753233 0.0340 0.01735952 0.01664048
## 5 0.1575220 0.3392780 0.1949060 0.2276940 0.0806 0.04115227 0.03944773
## 1 0.1331707 0.2868293 0.2582759 0.3017241 0.0200 0.01021148 0.00978852
## 8 0.1496585 0.3223415 0.2020086 0.2359914 0.0900 0.04595166 0.04404834
##        mean        sd   bup ranknew diff ranksd
## 3 0.8056843 0.1406709 0.800       1    2    173
## 7 0.7788966 0.1497558 0.731       2    5    166
## 2 0.7715759 0.1232042 0.694       3   -1    181
## 5 0.7710724 0.1384488 0.696       4    1    175
## 1 0.7683686 0.1223946 0.672       5   -4    182
## 8 0.7666975 0.1449658 0.683       6    2    170
# top 6 with high sd
head(df[order(df$ranksd), ]) # mixed bottom schools have high sd, John Warner is bad 
##     Rank            School Type Gender     ac    asa     bc         as
## 166  166       John Warner    C      M 0.6200 0.6200 0.0000 0.19658537
## 162  162    Congleton High    C      M 0.6341 0.2638 0.3703 0.08364390
## 165  165 Trinity CofE High    G      M 0.6230 0.2851 0.3379 0.09039756
## 170  170   King Edward VII    C      M 0.6050 0.2390 0.3660 0.07578049
## 143  143 Knutsford Academy    C      M 0.6730 0.2754 0.3976 0.08732195
## 175  175          Kingdown    C      M 0.5750 0.2211 0.3539 0.07010488
##             a         b         c  defgu         d      efgu      mean
## 166 0.4234146 0.0000000 0.0000000 0.3800 0.1940181 0.1859819 0.7130451
## 162 0.1801561 0.1707849 0.1995151 0.3659 0.1868190 0.1790810 0.6457217
## 165 0.1947024 0.1558418 0.1820582 0.3770 0.1924864 0.1845136 0.6504571
## 170 0.1632195 0.1688017 0.1971983 0.3950 0.2016767 0.1933233 0.6395480
## 143 0.1880780 0.1833759 0.2142241 0.3270 0.1669577 0.1600423 0.6627950
## 175 0.1509951 0.1632211 0.1906789 0.4250 0.2169940 0.2080060 0.6336433
##            sd   bup ranknew diff ranksd
## 166 0.2334163 0.618      49  117      1
## 162 0.2030982 0.430     162    0      2
## 165 0.1966352 0.433     158    7      3
## 170 0.1965426 0.404     166    4      4
## 143 0.1944851 0.459     136    7      5
## 175 0.1935467 0.390     170    5      6
# new and old rank by gender
boxplot(Rank ~ Gender, df)

boxplot(ranknew ~ Gender, df) 

aggregate(Rank ~ Gender, df, mean)
##   Gender     Rank
## 1      B 40.44444
## 2      G 64.52174
## 3      M 98.70000
aggregate(ranknew ~ Gender, df, mean) # boys have bigger rank / worse
##   Gender  ranknew
## 1      B 46.00000
## 2      G 63.73913
## 3      M 98.48667
boxplot(mean ~ Gender, df) # boys have highest avg but median is higher for girls

boxplot(sd ~ Gender, df) # mixed schools have highest SD/inequality

boxplot(bup ~ Gender, df) # boys and girls are close in getting B or higher

# top where rank is different
head(df[order(abs(df$diff), decreasing = T), ]) 
##     Rank                      School Type Gender     ac    asa     bc
## 166  166                 John Warner    C      M 0.6200 0.6200 0.0000
## 49    49                       Green    C      G 0.8138 0.0759 0.7379
## 20    20 Sacred Heart of Mary Girls'    C      G 0.8629 0.2279 0.6350
## 15    15                Arthur Terry    C      M 0.8755 0.2251 0.6504
## 97    97                      Dallam    C      M 0.7570 0.1940 0.5630
## 87    87            Colston's Girls'    C      G 0.7700 0.3989 0.3711
##             as          a         b         c  defgu          d       efgu
## 166 0.19658537 0.42341463 0.0000000 0.0000000 0.3800 0.19401813 0.18598187
## 49  0.02406585 0.05183415 0.3403246 0.3975754 0.1862 0.09506888 0.09113112
## 20  0.07226098 0.15563902 0.2928664 0.3421336 0.1371 0.06999970 0.06710030
## 15  0.07137317 0.15372683 0.2999690 0.3504310 0.1245 0.06356647 0.06093353
## 97  0.06151220 0.13248780 0.2596595 0.3033405 0.2430 0.12406949 0.11893051
## 87  0.12648049 0.27241951 0.1711539 0.1999461 0.2300 0.11743202 0.11256798
##          mean        sd   bup ranknew diff ranksd
## 166 0.7130451 0.2334163 0.618      49  117      1
## 49  0.6635360 0.1272150 0.401     133  -84    180
## 20  0.7017176 0.1451331 0.508      70  -50    169
## 15  0.7073247 0.1388268 0.516      64  -49    174
## 97  0.6610951 0.1656206 0.422     142  -45    133
## 87  0.7171709 0.1794301 0.574      43   44     55