naniar

naniar ํŒจํ‚ค์ง€ ํ›‘์–ด๋ณด๊ธฐ

NA ๊ด€๋ จํ•ด์„œ ์ง๊ด€์ ์œผ๋กœ ๊น”๋”ํ•œ ๊ทธ๋ž˜ํ”„๋กœ ํ›‘์–ด๋ณผ ์ˆ˜ ์žˆ๊ฒŒ ๋„์™€์ฃผ๋Š” ํŒจํ‚ค์ง€์ด๋‹ค.
๋ณธ ํฌ์ŠคํŒ…์€ ํ•ด๋‹น ์‚ฌ์ดํŠธ๋ฅผ ์ ๊ทน์ฐธ๊ณ ํ•˜์—ฌ ์ž‘์„ฑํ•˜์˜€๋‹ค.

1
2
library(tidyverse)
library(naniar)

vis_miss

1
vis_miss(airquality)

gg_miss_var

1
gg_miss_var(airquality)

1
gg_miss_var(airquality, show_pct = TRUE)

1
gg_miss_var(airquality, facet = Month)

gg_miss_case

1
gg_miss_case(airquality)

gg_miss_upset

1
gg_miss_upset(riskfactors)

1
n_var_miss(riskfactors)
## [1] 24
1
gg_miss_upset(riskfactors, nsets = n_var_miss(riskfactors)) 

1
gg_miss_upset(riskfactors, nsets = 4) #nset: ๋ณ€์ˆ˜ ๊ฐœ์ˆ˜

1
gg_miss_upset(riskfactors, nsets = 10, nintersects = 5) #nintersects: ๋ณ€์ˆ˜์กฐํ•ฉ ์ˆ˜

geom_miss_point

ggplot๊ณผ ์‘์šฉ

1
2
ggplot(airquality, aes(x = Ozone, y = Solar.R)) +
  geom_point()
## Warning: Removed 42 rows containing missing values (geom_point).

1
2
ggplot(airquality, aes(x = Ozone, y = Solar.R)) +
  geom_miss_point()

gg_miss_fctfas

1
gg_miss_fct(oceanbuoys, year)

miss_var_summary

1
2
3
riskfactors %>%
  group_by(marital) %>%
  miss_var_summary()
## # A tibble: 231 x 4
## # Groups:   marital [7]
##    marital variable      n_miss pct_miss
##    <fct>   <chr>          <int>    <dbl>
##  1 Married smoke_stop       120    91.6 
##  2 Married pregnant         117    89.3 
##  3 Married smoke_last        84    64.1 
##  4 Married smoke_days        73    55.7 
##  5 Married drink_average     68    51.9 
##  6 Married health_poor       67    51.1 
##  7 Married drink_days        67    51.1 
##  8 Married weight_lbs         6     4.58
##  9 Married bmi                6     4.58
## 10 Married diet_fruit         4     3.05
## # ... with 221 more rows

miss_var_span, gg_miss_span

1
miss_var_span(pedestrian, hourly_counts, span_every = 3000)
## # A tibble: 13 x 5
##    span_counter n_miss n_complete prop_miss prop_complete
##           <int>  <int>      <dbl>     <dbl>         <dbl>
##  1            1      0       3000  0                1    
##  2            2      0       3000  0                1    
##  3            3      1       2999  0.000333         1.00 
##  4            4    121       2879  0.0403           0.960
##  5            5    503       2497  0.168            0.832
##  6            6    555       2445  0.185            0.815
##  7            7    190       2810  0.0633           0.937
##  8            8      0       3000  0                1    
##  9            9      1       2999  0.000333         1.00 
## 10           10      0       3000  0                1    
## 11           11      0       3000  0                1    
## 12           12    745       2255  0.248            0.752
## 13           13    432       2568  0.144            0.856
1
gg_miss_span(pedestrian, hourly_counts, span_every = 3000)

1
gg_miss_span(pedestrian, hourly_counts, span_every = 3000, facet = sensor_name)

๊ทธ์™ธ ๋‹ค์–‘ํ•œ

1
gg_miss_case_cumsum(airquality)

1
gg_miss_var_cumsum(airquality)

1
gg_miss_which(airquality)