Thursday, February 20, 2020

Brunsdon and Comber ch 4: basic functions and maps

#BrunsdonComberCh4

#4.2 Introduction
tree.heights <- c(4.3,7.1,6.3,5.2,3.2)
tree.heights
if (tree.heights[1] < 6) { cat('Tree is small\n') } else
   { cat('Tree is large\n')}
#Tree is small

#for(variable in sequence) R expression
for (i in 1:5) {
  if (tree.heights[i] < 6) { cat('Tree',i,' is small\n') }
  else { cat('Tree',i,' is large\n')} }

assess.tree.height <- function(tree.list, thresh=5) #force named argument [with default]
{ for (i in 1:length(tree.list))
{ if(tree.list[i] < thresh) {cat('Tree',i, ' is small\n')}
  else { cat('Tree',i,' is large\n')}
}
}
assess.tree.height(tree.heights, thresh = 6)
assess.tree.height(tree.heights, thresh = 3)

tree.heights2 <- c(8,4.5,6.7,2,4)
assess.tree.height(tree.heights2, thresh = 4.5)

#4.3 Building blocks for Programs
#4.3.1 Conditional Statements
x <- -7
if (x < 0) cat("x is negative")
x <- 8
if (x < 0) cat("x is negative")

x <- -7
if (x < 0) cat("x is negative") else cat("x is positive")

x <- 8
if (x < 0) cat("x is negative") else cat("x is positive")

??is.

x <- c(1,3,6,8,9,5)
if (all(x > 0)) cat("All numbers are positive")
#All numbers are positive

x <- c(1,3,6,-8,9,5)
if (any(x > 0)) cat("Some numbers are positive")
#Some numbers are positive

any(x==0)
#[1] FALSE

#4.3.2 Code Blocks: { and }
x <- c(1,3,6,8,9,5)
if (all(x > 0)) {
  cat("All numbers are positive\n")
  total <- sum(x)
  cat("Their sum is ",total) }
#All numbers are positive
#Their sum is  32

#if condition { consequents }  else { alternatives }
x <- c(1,3,6,8,9,-5)
if (all(x > 0)) {
  cat("All numbers are positive\n")
  total <- sum(x)
  cat("Their sum is ",total)
  } else {
  cat("Not all numbers are positive\n")
  cat("This is probably an error as numbers are rainfall levels")
  }
#Not all numbers are positive
#This is probably an error as numbers are rainfall levels

#4.3.3 Functions

mean.rainfall <- function(rf) {
  if (all(rf> 0))  {
    mean.value <- mean(rf) 
    cat("The mean is ",mean.value)
  } else {
    cat("Warning: Not all values are positive\n") 
  }
}
mean.rainfall(c(8.5,9.3,6.5,9.3,9.4)) 
#The mean is  8.6

mean.rainfall2 <- function(rf) {
  if (all(rf> 0)) {
    return( mean(rf))} else {
      return(NA)}
}
mr <- mean.rainfall2(c(8.5,9.3,6.5,9.3,9.4)) 
mr
#[1] 8.6

rf <- "Tuesday"
mean.rainfall2(c(8.5,9.3,6.5,9.3,9.4))   ##a copy of rf is created and scoped within the function
#[1] 8.6

rf
#[1] "Tuesday"

#4.3.4 Loops and repetition
for (i in 1:5) {
  i.cubed <- i * i * i
  cat("The cube of",i,"is ",i.cubed,"\n")}

#seq(from, to, by = step value)
#seq(from, to, length = sequence length)
for (val in seq(0, 1, by=0.25)) {
  val.squared <- val * val
  cat("The square of",val,"is ",val.squared,"\n")}

i <- 1; n <- 654
repeat{
  i.squared <- i * i
  if (i.squared > n) break
  i <- i + 1}
cat("The first square number exceeding", n, "is ",i.squared,"\n")
#The first square number exceeding 654 is  676

first.bigger.square <- function(n) {
  i <- 1
  repeat{
    i.squared <- i * i
    if (i.squared > n) break
    i <- i + 1 }
  return(i.squared)}

first.bigger.square(76987)
#[1] 77284

#4.3.5 Debugging
cube.root <- function(x) {
  result <- x ^ (1/3)
  return(result)}
cube.root(27)
#[1] 3

#the next 3 lines can be put into functions.R
cube.root <- function(x) {
  result <- x ^ (1/3)
  return(result)}

source('functions.R')
cube.root(343)
cube.root(99)

circle.area <- function(r) {
  result <- pi * r ^ 2
  return(result)}

#source('functions.R')
cube.root(343)
circle.area(10)

#4.4.2 Data Checking
cube.root(-343)
#[1] NaN

cube.root <- function(x) {
  if (x >= 0) {
    result <- x ^ (1/3) }  else {
      result <- -(-x) ^ (1/3) }
  return(result)}

cube.root(3)
#[1] 1.44225

cube.root(-3)
#[1] -1.44225

debug(cube.root)

cube.root(-50)
x > 0

undebug(cube.root)

help(debug)

#4.4.3 More Data Checking
cube.root('Leeds')

is.numeric(77)
is.numeric("Lex")
is.numeric("77")
v <- "Two Sevens Clash"
is.numeric(v)

cube.root <- function(x) {
  if (is.numeric(x)) {
    if (x >= 0) { result <- x^(1/3) } 
    else { result <- -(-x)^(1/3) }
    return(result) } 
  else {
    cat("WARNING: Input must be numerical, not character\n")
    return(NA)}
}

#4.4.4 Loops Revisited
#4.4.4.1 Conditional Loops
gcd <- function(a,b)  {
  divisor <- min(a,b)
  dividend <- max(a,b)
  repeat  {
    remainder <- dividend %% divisor
    dividend <- divisor
    divisor <- remainder
    if (remainder == 0) break
  }
  return(dividend)
}

gcd(6,15)
gcd(25,75)
gcd(31,33)

#4.4.4.2 Deterministic Loops
cube.root.table <- function(n)  {
  for (x in 1:n)  {
    cat("The cube root of", x, "is", cube.root(x),"\n")
  }
}

#4.4.5 Further Activity
cat( sprintf("The cube root of %4.0f is %8.3f \n", x, cube.root(x)) )

library(GISTools)
library(sf)
data(georgia)
adj.list <- list()   # create an empty list for the results
georgia_sf <- st_as_sf(georgia2)  # convert georgia to sf
county.i <- georgia_sf[1,]        # extract a single county
# determine the adjacent counties
# the [-1] removes Appling form its own list
adj.i <- unlist(st_intersects(county.i, georgia_sf))[-1]
# extract their names
adj.names.i <- georgia2$Name[adj.i]
# add to the list
adj.list[[1]] <- adj.i
# name the list elements
names(adj.list[[1]]) <- adj.names.i

adj.list

#[[1]]
#Bacon Jeff Davis     Pierce   Tattnall     Toombs
#3         80        113        132        138

# in sequence
adj.list[[2]] <- sample(1:100, 3)
# or not!
i = 4
adj.list[[i]] <- c("Chris", "and", "Lex")
# have a look!
adj.list

#4.5 Spatial data structures
rm(list = ls())

library(GISTools)
data(georgia)
ls()  #or in RStudio look at Environment pane

class(georgia.polys)
str(georgia.polys)
#[1] "list"
georgia.polys
head(georgia.polys[[1]], 2)

#[,1]    [,2]
#[1,] 1292287 1075896
#[2,] 1292654 1075919

# plot Appling
plot(georgia.polys[[1]], asp=1, type='l', xlab = "Easting", ylab = "Northing")
# plot adjacent county outlines
points(georgia.polys[[3]], asp=1, type='l', col = "red")
points(georgia.polys[[151]], asp=1, type='l', col = "blue", lty = 2)
#Figure 4.1: A simple plot of Appling County and 2 adjacent counties.

head(georgia2@polygons[[1]]@Polygons[[1]]@coords)
head(georgia2@data[, 13:14])

g <- st_as_sf(georgia2)
head(g[,13:14])

#4.6 Apply functions
library(GISTools)
data(newhaven) #many spatial datasets: blocks, breach, places, tracts etc
class(blocks)
## the @data route
head(blocks@data[, 14:17])
## the data frame route
head(data.frame(blocks[, 14:17]))

apply(blocks@data[,14:17], 1, max)

apply(blocks@data[,14:17], 2, max)

# set up vector to hold result
result.vector <- vector()
for (i in 1:nrow(blocks@data))  {
  # for each row determine which column has the max value
  result.i <- which.max(data.frame(blocks[i,14:17]))
  # put into the result vector
  result.vector <- append(result.vector, result.i)
}

res.vec <-apply(data.frame(blocks[,14:17]), 1, which.max)
# compare the two results
identical(as.vector(res.vec), as.vector(result.vector))

# Loop
t1 <- Sys.time()
result.vector <- vector()
for (i in 1:nrow(blocks@data)){
  result.i <- which.max(data.frame(blocks[i,14:17]))
  result.vector <- append(result.vector, result.i)
}
Sys.time() - t1

# Apply
t1 <- Sys.time()
res.vec <-apply(data.frame(blocks[,14:17]), 1, which.max)
Sys.time() - t1

plot(bbox(georgia2)[1,], bbox(georgia2)[2,], asp = 1,
     type='n', xlab='', ylab='', xaxt='n', yaxt='n', bty='n')
for (i in 1:length(georgia.polys)){
  points(georgia.polys[[i]], type='l')
  # small delay so that you can see the plotting
  Sys.sleep(0.05)
}

plot(bbox(georgia2)[1,], bbox(georgia2)[2,], asp = 1,
     type='n',xlab='',ylab='',xaxt='n',yaxt='n',bty='n')
invisible(mapply(polygon,georgia.polys))

# create a distance matrix
georgia_sf <- st_as_sf(georgia2)
dMat <- as.matrix(dist(coordinates(georgia2)))
dim(dMat)
# create an empty vector
count.vec <- vector()
# create an empty list
names.list <- list()
# for each county...
for( i in 1:nrow(georgia_sf)) {
  # which counties are within 50km
  vec.i <- which(dMat[i,] <= 50000)
  # add to the vector
  count.vec <- append(count.vec, length(vec.i))
  # find their names
  names.i <- georgia_sf$Name[vec.i]
  # add to the list
  names.list[[i]] <- names.i
}
# have a look!
count.vec
names.list

lapply(names.list, length)

#Self-test question 7: Answer below
#count.vec <- apply(dMat,1.my.func1)
#names.list <- apply(dMat,1.my.func2)

#4.7 Manipulating data with dplyr
vignette('dplyr', package = 'dplyr')

install.packages("nycflights13")
library("nycflights13")
class(flights)
flights

data(package = "nycflights13")

vignette('two-table', package = 'dplyr')

flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier)

flights2 <- select(flights, year:day, hour, origin, dest, tailnum, carrier)

library(nycflights13)
library(tidyverse)
# select the variables
flights2 <- flights %>% select(origin, dest)
# remove Alaska and Hawaii
flights2 <- flights2[-grep("ANC", flights2$dest), ]
flights2 <- flights2[-grep("HNL", flights2$dest), ]
# group by destination
flights2 <- group_by(flights2, dest)
flights2 <- summarize(flights2, count = n())
# assign Lat and Lon for Origin
flights2$OrLat <- 40.6925
flights2$OrLon <- -74.16867
# have a look!
flights2
# A tibble: 103 x 4

#Some mapping
library(maps)
library(geosphere)
# origin and destination examples
dest.eg <- matrix(c(77.1025, 28.7041), ncol = 2)
origin.eg <- matrix(c(-74.16867, 40.6925), ncol = 2)
# map the world from the maps package data
map("world", fill=TRUE, col="white", bg="lightblue")
# plot the points
points(dest.eg, col="red", pch=16, cex = 2)
points(origin.eg, col = "cyan", pch = 16, cex = 2)
# add the route
for (i in 1:nrow(dest.eg)) {
  lines(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                       breakAtDateLine=FALSE, addStartEnd=FALSE,
                       sp=FALSE, sepNA), lwd = 2, lty = 2)
}

#Other maps from the maps package
map("usa", fill=TRUE, col="white", bg="lightblue")

#4.8 Answers to self-test questions
#Q1
cube.root.2 <- function(x)  {
  if (is.numeric(x))  {
    result <- sign(x)*abs(x)^(1/3)
    return(result) 
  } else {
    cat("WARNING: Input must be numerical, not character\n")
    return(NA)
  }
}

#Q2
gcd <- function(a,b)  {
  divisor <- min(a,b) # line 1
  dividend <- max(a,b) # line 1
  repeat  { #line 5
    remainder <- dividend %% divisor #line 2
    dividend <- divisor # line 3
    divisor <- remainder # line 4
    if (remainder == 0) break #line 6
  }
  return(dividend)
}

#Q3 part i)
gcd.60 <- function(a)  {
  for(i in 1:a)
  { divisor <- min(i,60)
  dividend <- max(i,60)
  repeat  {
    remainder <- dividend %% divisor
    dividend <- divisor
    divisor <- remainder
    if (remainder == 0) break
  }
  cat(dividend, "\n")  
  }
}

gcd.60 <- function(a)  {
  for(i in 1:a) {
    dividend <- gcd(i,60)
    cat(i,":", dividend, "\n")   
  }
}

#Q3 part ii)
gcd.all <- function(x,y)  {
  for(n1 in 1:x) {
    for (n2 in 1:y) {
      dividend <- gcd(n1, n2)
      cat("when x is", n1, "& y is", n2, "dividend =", dividend,"\n")
    }
  }
}

#Q4
cube.root.table <- function(n)  {
  for (x in seq(0.5, n, by = 0.5))  {
    cat("The cube root of ",x," is",
      sign(x)*abs(x)^(1/3), "\n")  }
}

cube.root.table <- function(n)  {
  if (n < 0 ) by.val = 0.5
  if (n < 0 ) by.val =-0.5
  for (x in seq(0.5, n, by = by.val))  {
    cat("The cube root of ",x," is",
      sign(x)*abs(x)^(1/3),"\n") }
}

#Q5

# create an empty list for the results
adj.list <- list()
# convert georgia to sf
georgia_sf <- st_as_sf(georgia2)
for (i in 1:nrow(georgia_sf)) {
  # extract a single county
  county.i <- georgia_sf[i,]
  # determine the adjacent counties
  # the [-1] removes Appling form its own list
  adj.i <- unlist(st_intersects(county.i, georgia_sf))[-1]
  # extract their names
  adj.names.i <- georgia2$Name[adj.i]
  # add to the list
  adj.list[[i]] <- adj.i
  # name the list elements
  names(adj.list[[i]]) <- adj.names.i
}

#Q6
return.adj <- function(sf.data){
  # convert to sf regardless!
  sf.data <- st_as_sf(sf.data)
  adj.list <- list()
  for (i in 1:nrow(sf.data)) {
    # extract a single county
    poly.i <- sf.data[i,]
    # determine the adjacent counties
    adj.i <- unlist(st_intersects(poly.i, sf.data))[-1]
    # add to the list
    adj.list[[i]] <- adj.i
  }
  return(adj.list)

# test it!
return.adj(georgia_sf)
return.adj(blocks)

#Q7
# number of counties within 50km
my.func1 <- function(x){
  vec.i <- which(x <= 50000)[-i]
  return(length(vec.i))
}
# their names
my.func2 <- function(x){
  vec.i <- which(x <= 50000)
  names.i <- georgia_sf$Name[vec.i]
  return(names.i)
}
count.vec <- apply(dMat,1, my.func1)
names.list <- apply(dMat,1, my.func2)

#Q8
# Part 1: the join
flights2 <- flights2 %>% left_join(airports, c("dest" = "faa"))
flights2 <- flights2 %>% select(count, dest, OrLat, OrLon, DestLat=lat, DestLon=lon)
# get rid of any NAs
flights2 <- flights2[!is.na(flights2$DestLat),]
flights2

# Part 2: the plot
# Using standard plots
dest.eg <- matrix(c(flights2$DestLon, flights2$DestLat), ncol = 2)
origin.eg <- matrix(c(flights2$OrLon, flights2$OrLat), ncol = 2)
map("usa", fill=TRUE, col="white", bg="lightblue")
points(dest.eg, col="red", pch=16, cex = 1)
points(origin.eg, col = "cyan", pch = 16, cex = 1)
for (i in 1:nrow(dest.eg)) {
  lines(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                       breakAtDateLine=FALSE,
                       addStartEnd=FALSE, sp=FALSE, sepNA))
}

# using ggplot
all_states <- map_data("state")
dest.eg <- data.frame(DestLon = flights2$DestLon,
                      DestLat = flights2$DestLat)
origin.eg <- data.frame(OrLon = flights2$OrLon,
                        OrLat = flights2$OrLat)
library(GISTools)
# Figure 2 using ggplot
# create the main plot
mp <- ggplot() +
  geom_polygon( data=all_states,
                aes(x=long, y=lat, group = group),
                colour="white", fill="grey20") +
  coord_fixed() +
  geom_point(aes(x=dest.eg$DestLon, y=dest.eg$DestLat), color="#FB6A4A", size=2) +
  theme(axis.title.x=element_blank(),
        axis.text.x =element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y =element_blank(),
        axis.ticks.y=element_blank())
# create some transparent shading
cols = add.alpha(colorRampPalette(brewer.pal(9,"Reds"))(nrow(flights2)), 0.7)
# loop through the destinations
for (i in 1:nrow(flights2)) {
  # line thickness related flights
  lwd.i = 1+ (flights2$count[i]/max(flights2$count))
  # a sequence of colours
  cols.i = cols[i]
  # create a dataset
  link <- as.data.frame(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                                       breakAtDateLine=FALSE, addStartEnd=FALSE, sp=FALSE, sepNA))
  names(link) <- c("lon", "lat")
  mp <- mp + geom_line(data=link, aes(x=lon, y=lat), color= cols.i, size = lwd.i)
}
mp # plot!

0 Comments:

Post a Comment

<< Home