R Course - Day 3

Functions II

If? Then! Else?

Conditional execution:

if (condition) {
  statement1
}

or

if (condition) {
  statement1
} else {
  statement2
}

Example

x <- 5
if (x < 10) {
  y <- 0
}
y
[1] 0

Another example:

if (x >= 10) {
  y <- 1
} else {
  y <- 0
}
y
[1] 0

If else on one line

Oneliner:

y <- if(x >= 10) 1 else 0
y
[1] 0

Beware!

y <- if(x >= 10) 1 #no else
y
NULL

Nested if() Statements

if inside an if

if(condition1) {
  if(condition2) {
    statement1 
  } else {
    statement2
  }
}

Adding if else to the descriptives function

my_descriptives <- function(x){
  if(class(x) == "numeric"){
    x.trim <- x[x>0]
    summary(x.trim)
  } else if (class(x) == "factor"){
    x.trim <- droplevels(x[x!=-1])
    table(x.trim)
  }
}
my_descriptives(data$Ages)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  28.00   37.50   48.00   46.04   55.50   67.00 
my_descriptives(data$Sex)
x.trim
 0  1 
12 12 

Stringing multiple if else together

whatAnimalSound <- function(animal){
  if(animal == "cat") {
    return("Meow!")
  } else if (animal == "frog") {
    return("Ribbit!")
  } else if (animal == "dog") {
    return("Woof!")
  } else {
    return(paste0("I don't know what sound a '",animal,"' makes..."))
  }
}
whatAnimalSound("dog")
[1] "Woof!"
whatAnimalSound("bird")
[1] "I don't know what sound a 'bird' makes..."

if-else on a vector: The ifelse() Function

height <- c(69, 71, 67, 66, 72, 71, 61, 65, 73, 70, 68, 74)

if (height > 69){
  print("tall") 
} else {
  print("short")
}
Error in if (height > 69) {: the condition has length > 1
htCategory <- ifelse(height > 69, yes = "tall", no = "short")

htCategory
 [1] "short" "tall"  "short" "short" "tall"  "tall"  "short" "short" "tall" 
[10] "tall"  "short" "tall" 

For more complicated cases we can use apply()

Terminating a function with returns, errors, and warnings

The following functions are useful for terminating a function call or just printing a warning message:

return()     # Terminate a function call and return a value
stop()       # Terminate a function call and print an error message
warning()    # Print a warning message (without terminating the function call)

Terminating a Function Call Using return()

mySign <- function(x) {
  if(x < 0) return("Negative")
  if(x > 0) return("Positive")
  return("Zero")
}

mySign(x = 13)
[1] "Positive"

Note: return("Zero") not reached

Terminating Using stop()

stop() means “error”

myRatio <- function(x, y) {
  if(y == 0) stop("Cannot divide by 0")
  return(x/y)
}
myRatio(x = 3, y = 0)
Error in myRatio(x = 3, y = 0): Cannot divide by 0

Note: return(x/y) not reached

Printing a Warning Message Using warning()

warning() just prints a warning message.

myRatio <- function(x, y) {
  if(y == 0) warning("Attempt made to divide by 0")
  return(x/y)
}

myRatio(x = 3, y = 0)
Warning in myRatio(x = 3, y = 0): Attempt made to divide by 0
[1] Inf

R can divide by zero, it returns special value Inf

Looping

Repeat (iterate) an R statement

for() # Repeat a set of statements a specified number of times
while() # Repeat a set of statements as long as a specified condition is met
repeat # Repeat a set of statements until a break command is encountered

Stopping a loop:

break # Terminate a loops iterations
next # Skip ahead to the next iteration

Example for, while and repeat loops

for(i in 1:5) {
  print(i^2)
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
i <- 1
while(i <= 5) {
  print(i^2)
  i <- i + 1
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
i <- 1
repeat {
  print(i^2)
  i <- i + 1
  if(i > 5) break
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25

for() Loops

for(i in sequence) {
  statement1
  statement2
  .
  .
  .
  statementq
}

“Looping” over a data.frame

coins <- data.frame(Coin = c("penny", "quarter", "nickel", "quarter", "dime", "penny"),
                    Year = c(1943, 1905, 1889, 1960, 1937, 1900),
                    Mint = c("Den", "SF", "Phil", "Den", "SF", "Den"),
                    Condition = c("good", "fair", "excellent", "good", "poor", "good"),
                    Value = c(12.00, 55.00, 300.00, 40.00, 18.00, 28.00),
                    Price = c(15.00, 45.00, 375.00, 25.00, 20.00, 20.00))
coins
     Coin Year Mint Condition Value Price
1   penny 1943  Den      good    12    15
2 quarter 1905   SF      fair    55    45
3  nickel 1889 Phil excellent   300   375
4 quarter 1960  Den      good    40    25
5    dime 1937   SF      poor    18    20
6   penny 1900  Den      good    28    20

Calculate the mean of each column:

colMeans(coins)
Error in colMeans(coins): 'x' must be numeric

Looping Over data.frame Columns

means <- NULL
for(i in 1:ncol(coins)) {
  if (is.numeric(coins[ , i])) {
    means <- c(means, mean(coins[ , i]))
  }
}

means
[1] 1922.33333   75.50000   83.33333

Looping Over List Elements

myList <- list(
  w = c(4, 4, 5, 5, 6, 6),
  x = c("a", "b", "c"),
  y = c(5, 10, 15),
  z = c("r", "s", "t", "u", "v")
)

lengths <- NULL

for(i in myList) {
  print(i)
  lengths <- c(lengths, length(i))
}
[1] 4 4 5 5 6 6
[1] "a" "b" "c"
[1]  5 10 15
[1] "r" "s" "t" "u" "v"
lengths
[1] 6 3 3 5

Using apply functions

Applying a function to an object

  • apply()
  • lapply()
  • sapply()
  • tapply()

Using apply on matrices

apply() requires 3 arguments:

args(apply)
function (X, MARGIN, FUN, ..., simplify = TRUE) 
NULL
  • x: the data
  • MARGIN: 1, 2, or c(1,2)
  • function: the function to apply (without the ( ))

Applying a function on a matrix

mat <- matrix(1:25,nrow=5)

mat
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    6   11   16   21
[2,]    2    7   12   17   22
[3,]    3    8   13   18   23
[4,]    4    9   14   19   24
[5,]    5   10   15   20   25

Margin 1 to apply to the rows:

apply(mat, 1, max)
[1] 21 22 23 24 25

Margin 2 to apply to the columns:

apply(mat, 2, max)
[1]  5 10 15 20 25

Applying a function to each element:

apply(mat, c(1,2), sqrt)
         [,1]     [,2]     [,3]     [,4]     [,5]
[1,] 1.000000 2.449490 3.316625 4.000000 4.582576
[2,] 1.414214 2.645751 3.464102 4.123106 4.690416
[3,] 1.732051 2.828427 3.605551 4.242641 4.795832
[4,] 2.000000 3.000000 3.741657 4.358899 4.898979
[5,] 2.236068 3.162278 3.872983 4.472136 5.000000

Remember:

sqrt(mat)
         [,1]     [,2]     [,3]     [,4]     [,5]
[1,] 1.000000 2.449490 3.316625 4.000000 4.582576
[2,] 1.414214 2.645751 3.464102 4.123106 4.690416
[3,] 1.732051 2.828427 3.605551 4.242641 4.795832
[4,] 2.000000 3.000000 3.741657 4.358899 4.898979
[5,] 2.236068 3.162278 3.872983 4.472136 5.000000

Using lapply on lists to return lists

mylist <- list(matrix(1:16,nrow=4), matrix(1:9,nrow=3),matrix(1:4,nrow=2))

lapply(mylist, dim)
[[1]]
[1] 4 4

[[2]]
[1] 3 3

[[3]]
[1] 2 2

lapply on a data.frame (list of lists)

df <- data.frame("col1"=c(1,1,1,1), "col2"=c(2,2,2,2), "col3"=c(3,3,3,3))

lapply(df, sum)
$col1
[1] 4

$col2
[1] 8

$col3
[1] 12

Using lapply alternative: sapply

“Simplify” lapply output:

sapply(mylist, dim)
     [,1] [,2] [,3]
[1,]    4    3    2
[2,]    4    3    2
sapply(df, sum)
col1 col2 col3 
   4    8   12 

Using tapply on groups of data

patients <- data.frame("group"=paste('grp',
                          c(1,1,1,1,1,2,2,2,2,2),sep='-'), 
                       "outcome"=rnorm(10)) 
                      #10 random normally distributed values
patients
   group     outcome
1  grp-1  0.36335899
2  grp-1 -2.10322749
3  grp-1 -0.46796604
4  grp-1  0.81349481
5  grp-1 -1.88003678
6  grp-2 -0.48965553
7  grp-2 -0.02886587
8  grp-2 -1.10973330
9  grp-2 -0.18656796
10 grp-2  0.67209573
tapply(patients$outcome, patients$group, mean)
     grp-1      grp-2 
-0.6548753 -0.2285454 

Multiple grouping variables possible

patients <- data.frame("group"=paste('grp',c(1,1,1,1,1,2,2,2,2,2),sep='-'),
                       "serotype"=c("A","B","A","B","A","B","A","B","A","B"),
                       "outcome"=rnorm(10))

tapply(patients$outcome, list(patients$group, patients$serotype), mean)
                A        B
grp-1 -0.17958543 0.744583
grp-2 -0.07263831 1.125893