class: center, middle, inverse, title-slide # Day Thirty-Five: Iteration ## SDS 192: Introduction to Data Science ###
Lindsay Poirier
Statistical & Data Sciences
, Smith College
###
Spring 2022
--- # For loops ```r for (x in 1:5) { print(x) } ``` ``` ## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5 ``` --- # For loops ```r df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) for (i in df$a) { print(i + 1) } ``` ``` ## [1] 3 ## [1] 4 ## [1] 5 ## [1] 6 ``` --- # For loops ```r library(tidyverse) df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) for (i in df %>% select(a:c)) { print(sum(i)) } ``` ``` ## [1] 14 ## [1] 14 ## [1] 18 ``` --- # Vectorized Functions ```r library(tidyverse) df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) df %>% summarize(across(a:c, sum)) ``` ``` ## a b c ## 1 14 14 18 ``` --- # Vectorized Functions ```r library(tidyverse) df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) df %>% summarize(across(where(is.numeric), sum)) ``` ``` ## a b c ## 1 14 14 18 ``` --- # `purrr` package * Package for working with functions and vectors * Based on the functional programming paradigm * Provides a family of `map()` functions --- # `map()` .pull-left[ ```r df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) ``` ] .pull-right[ ```r sum_x_in_df <- function(x) { summed_vector <- sum(x) return(summed_vector) } ``` ```r map(df %>% select(a:c), sum_x_in_df) ``` ``` ## $a ## [1] 14 ## ## $b ## [1] 14 ## ## $c ## [1] 18 ``` ] --- .pull-left[ ```r df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) ``` ] .pull-right[ ```r sum_x_in_df <- function(x) { summed_vector <- sum(x) return(summed_vector) } ``` ```r map_dfc(df %>% select(a:c), sum_x_in_df) ``` ``` ## # A tibble: 1 × 3 ## a b c ## <dbl> <dbl> <dbl> ## 1 14 14 18 ``` ] --- .pull-left[ ```r df <- data.frame( name = c("obs1", "obs2", "obs3", "obs1"), a = c(2, 3, 4, 5), b = c(4, 7, 2, 1), c = c(4, 9, 3, 2) ) ``` ] .pull-right[ ```r print_rows <- function(obs = "obs1") { df %>% filter(name == obs) } ``` ```r map_dfr(c("obs1", "obs2"), print_rows) ``` ``` ## name a b c ## 1 obs1 2 4 4 ## 2 obs1 5 1 2 ## 3 obs2 3 7 9 ``` ] --- # Let's practice!