Use the box
data from the package classdata
library(tidyverse)
library(classdata)
box
?summary(box) # there are missing values!
## Rank Rank.Last.Week Movie Distributor
## Min. : 1.00 Min. : 1.00 Length:32885 Length:32885
## 1st Qu.: 24.00 1st Qu.: 20.00 Class :character Class :character
## Median : 48.00 Median : 41.00 Mode :character Mode :character
## Mean : 49.16 Mean : 43.82
## 3rd Qu.: 73.00 3rd Qu.: 65.00
## Max. :128.00 Max. :128.00
## NA's :137 NA's :6269
## Gross Change Thtrs. Per.Thtr.
## Min. : -2928896 Min. :-1230729.0 Min. : 1.0 Min. :-2928896
## 1st Qu.: 3475 1st Qu.: -57.0 1st Qu.: 4.0 1st Qu.: 530
## Median : 26275 Median : -38.0 Median : 24.0 Median : 956
## Mean : 1563419 Mean : -31.3 Mean : 412.9 Mean : 2070
## 3rd Qu.: 234873 3rd Qu.: -7.0 3rd Qu.: 211.2 3rd Qu.: 1889
## Max. :357115007 Max. : 44504.0 Max. :4802.0 Max. : 243390
## NA's :6294 NA's :1 NA's :1
## Total.Gross Week Date
## Min. : 0 Min. : 1.00 Min. :2013-03-08
## 1st Qu.: 80460 1st Qu.: 3.00 1st Qu.:2014-10-24
## Median : 1258670 Median : 6.00 Median :2016-06-17
## Mean : 34116370 Mean : 45.81 Mean :2016-06-12
## 3rd Qu.: 29358392 3rd Qu.: 11.00 3rd Qu.:2018-01-26
## Max. :936658640 Max. :105065.00 Max. :2019-10-11
##
Rank
when Rank.Last.Week
is missing?box %>%
filter(is.na(Rank.Last.Week)) %>%
head()
## Rank Rank.Last.Week Movie Distributor
## 1 2 NA The Addams Family United Artists
## 2 3 NA Gemini Man Paramount Pictures
## 3 9 NA Jexi Lionsgate
## 4 13 NA Parasite Neon
## 5 16 NA The Sky is Pink Gravitas Ventures
## 6 25 NA Metallica & San Francisco S... Trafalgar Releasing
## Gross Change Thtrs. Per.Thtr. Total.Gross Week Date
## 1 30300007 NA 4007 7562 30300007 1 2019-10-11
## 2 20552372 NA 3642 5643 20552372 1 2019-10-11
## 3 3106730 NA 2332 1332 3106730 1 2019-10-11
## 4 384216 NA 3 128072 384216 1 2019-10-11
## 5 295550 NA 194 1523 295550 1 2019-10-11
## 6 144736 NA 157 922 907163 1 2019-10-11
box
, when removing all missing values with the function na.omit()
?dim(box)
## [1] 32885 11
dim(na.omit(box)) # don't use na.omit
## [1] 26388 11
box$Rank.Last.Week <- na.omit(box$Rank.Last.Week)
There is a mismatch in the dimensions.