Basic Math and Logic with R
# ---- SETUP
x <- 18
r_is_cool <- 23
# 1
x + r_is_cool
#2
x >= r_is_cool
#3
x < r_is_cool & x + 10 > r_is_cool
#4
sum(c(TRUE, TRUE, FALSE, TRUE))
Indexing Vectors
# ---- SETUP
what_the_vec <- c(seq.int(from = 8, to = 72, by = 2))
# 1
what_the_vec[14]
# 2
new_vec <- what_the_vec[c(2, 8, 20)]
# 3
new_vec[2] < new_vec[1]
# 4
new_vec2 <- new_vec[-1]
Indexing Data Frames
Section 1
# 1
dim(iris)
# --- OR ----
str(iris)
# str gives more info... but technically all we needed was dimensions
# 2
# str(iris) from above gives this answer as well
Section 2
# 1
pet_width <- iris[,4]
# ----- OR -----
pet_width <- iris$Petal.Width # this is same thing as above
# 2
mean(pet_width)
# 3
iris[2,3]
# 4
sum(iris$Sepal.Width > 3.2)
# 5
sum(iris$Sepal.Width > 3.2)/length(iris$Sepal.Width)
# ---- OR
sum(iris$Sepal.Width > 3.2)/nrow(iris)
# Note that because iris$Sepal.Width is a vector, we can use length.
# nrow on a data frame gives a sort of "length" of a df
# So, either will work
# 6
table(iris$Species)
# 7
new_data <- iris[iris$Species != "setosa",]
# This one may have been tricky for you
# != means not equal and the comma tells us that we want
# the iris data frame for all rows where
# iris$Species IS NOT EQUAL to "setosa"
# to check if you did this right - table(new_data$Species)
# 8
mean(new_data$Sepal.Length)
# 9
new_data$large_petal_flag <- ifelse(new_data$Petal.Length > 4.5, 1, 0)
# 10
# sum
sum(new_data$large_petal_flag)
# proportion
sum(new_data$large_petal_flag)/length(new_data$large_petal_flag)
# --- OR ---
mean(new_data$large_petal_flag)
# Notice that the mean of a binary variable is the proportion of '1' values present
# So, both the above works better with mean being easier to read
Dplyr
# ---- SETUP
# install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# 1 & # 2
fancy_iris <- iris %>%
mutate(double_petal = 2 * Petal.Width) %>% # 1
filter(double_petal > 2.3,
double_petal < 4)
# Note that to filter by multiple things, we can separate by a comma
# If we wanted and "OR", we could've typed "|"
# Also, filter keeps true values. So, to filter OUT the values specified,
# we needed to reverse the signs
# 3
iris %>%
group_by(Species) %>%
summarise(max_Sepal.Length = max(Sepal.Length))
## `summarise()` ungrouping output (override with `.groups` argument)