jalan-jalan sambil minum teh-tarek

Tuesday, February 25, 2020

Formatting USB memory in Ubuntu 18.04

In a terminal type 'sudo lsblk' then note the NAME of the USB.
Then type 'sudo dd if=/dev/zero of=/dev/sda bs=4M iflag=nocache oflag=direct' to overwrite it with zeroes in blocks of 4M. Check with lsblk above again, and note the NAME of the USB (without numeric ending, eg sdb NOT sdb1).
Once done, type 'sudo parted /dev/sdb mklabel msdos' ie NAME of USB. It may complain and ask to reboot. Oblige. Once up, re-type.

Then call utility 'Disk' using [Window] key. It will bring up all the volumes (hard disk, USB) on the left pane. Select the right volume (USB). Then on the right pane tap [+] (near the two gears icon) to create partition with a chosen format (FAT32, preferably). Initially its Partition type is Linux.
It has a sliding option of 'Erase data or not' [while partitioning]. I choose 'Erase' which takes time [20' for 32 GB]. Eventually, its Partition type is FAT32.

To mount it for the first time, tap the triangle icon at the bottom of the volume on the right pane.

Thursday, February 20, 2020

Brunsdon and Comber ch 4: basic functions and maps

#BrunsdonComberCh4

#4.2 Introduction
tree.heights <- c(4.3,7.1,6.3,5.2,3.2)
tree.heights
if (tree.heights[1] < 6) { cat('Tree is small\n') } else
   { cat('Tree is large\n')}
#Tree is small

#for(variable in sequence) R expression
for (i in 1:5) {
  if (tree.heights[i] < 6) { cat('Tree',i,' is small\n') }
  else { cat('Tree',i,' is large\n')} }

assess.tree.height <- function(tree.list, thresh=5) #force named argument [with default]
{ for (i in 1:length(tree.list))
{ if(tree.list[i] < thresh) {cat('Tree',i, ' is small\n')}
  else { cat('Tree',i,' is large\n')}
}
}
assess.tree.height(tree.heights, thresh = 6)
assess.tree.height(tree.heights, thresh = 3)

tree.heights2 <- c(8,4.5,6.7,2,4)
assess.tree.height(tree.heights2, thresh = 4.5)

#4.3 Building blocks for Programs
#4.3.1 Conditional Statements
x <- -7
if (x < 0) cat("x is negative")
x <- 8
if (x < 0) cat("x is negative")

x <- -7
if (x < 0) cat("x is negative") else cat("x is positive")

x <- 8
if (x < 0) cat("x is negative") else cat("x is positive")

??is.

x <- c(1,3,6,8,9,5)
if (all(x > 0)) cat("All numbers are positive")
#All numbers are positive

x <- c(1,3,6,-8,9,5)
if (any(x > 0)) cat("Some numbers are positive")
#Some numbers are positive

any(x==0)
#[1] FALSE

#4.3.2 Code Blocks: { and }
x <- c(1,3,6,8,9,5)
if (all(x > 0)) {
  cat("All numbers are positive\n")
  total <- sum(x)
  cat("Their sum is ",total) }
#All numbers are positive
#Their sum is  32

#if condition { consequents }  else { alternatives }
x <- c(1,3,6,8,9,-5)
if (all(x > 0)) {
  cat("All numbers are positive\n")
  total <- sum(x)
  cat("Their sum is ",total)
  } else {
  cat("Not all numbers are positive\n")
  cat("This is probably an error as numbers are rainfall levels")
  }
#Not all numbers are positive
#This is probably an error as numbers are rainfall levels

#4.3.3 Functions

mean.rainfall <- function(rf) {
  if (all(rf> 0))  {
    mean.value <- mean(rf) 
    cat("The mean is ",mean.value)
  } else {
    cat("Warning: Not all values are positive\n") 
  }
}
mean.rainfall(c(8.5,9.3,6.5,9.3,9.4)) 
#The mean is  8.6

mean.rainfall2 <- function(rf) {
  if (all(rf> 0)) {
    return( mean(rf))} else {
      return(NA)}
}
mr <- mean.rainfall2(c(8.5,9.3,6.5,9.3,9.4)) 
mr
#[1] 8.6

rf <- "Tuesday"
mean.rainfall2(c(8.5,9.3,6.5,9.3,9.4))   ##a copy of rf is created and scoped within the function
#[1] 8.6

rf
#[1] "Tuesday"

#4.3.4 Loops and repetition
for (i in 1:5) {
  i.cubed <- i * i * i
  cat("The cube of",i,"is ",i.cubed,"\n")}

#seq(from, to, by = step value)
#seq(from, to, length = sequence length)
for (val in seq(0, 1, by=0.25)) {
  val.squared <- val * val
  cat("The square of",val,"is ",val.squared,"\n")}

i <- 1; n <- 654
repeat{
  i.squared <- i * i
  if (i.squared > n) break
  i <- i + 1}
cat("The first square number exceeding", n, "is ",i.squared,"\n")
#The first square number exceeding 654 is  676

first.bigger.square <- function(n) {
  i <- 1
  repeat{
    i.squared <- i * i
    if (i.squared > n) break
    i <- i + 1 }
  return(i.squared)}

first.bigger.square(76987)
#[1] 77284

#4.3.5 Debugging
cube.root <- function(x) {
  result <- x ^ (1/3)
  return(result)}
cube.root(27)
#[1] 3

#the next 3 lines can be put into functions.R
cube.root <- function(x) {
  result <- x ^ (1/3)
  return(result)}

source('functions.R')
cube.root(343)
cube.root(99)

circle.area <- function(r) {
  result <- pi * r ^ 2
  return(result)}

#source('functions.R')
cube.root(343)
circle.area(10)

#4.4.2 Data Checking
cube.root(-343)
#[1] NaN

cube.root <- function(x) {
  if (x >= 0) {
    result <- x ^ (1/3) }  else {
      result <- -(-x) ^ (1/3) }
  return(result)}

cube.root(3)
#[1] 1.44225

cube.root(-3)
#[1] -1.44225

debug(cube.root)

cube.root(-50)
x > 0

undebug(cube.root)

help(debug)

#4.4.3 More Data Checking
cube.root('Leeds')

is.numeric(77)
is.numeric("Lex")
is.numeric("77")
v <- "Two Sevens Clash"
is.numeric(v)

cube.root <- function(x) {
  if (is.numeric(x)) {
    if (x >= 0) { result <- x^(1/3) } 
    else { result <- -(-x)^(1/3) }
    return(result) } 
  else {
    cat("WARNING: Input must be numerical, not character\n")
    return(NA)}
}

#4.4.4 Loops Revisited
#4.4.4.1 Conditional Loops
gcd <- function(a,b)  {
  divisor <- min(a,b)
  dividend <- max(a,b)
  repeat  {
    remainder <- dividend %% divisor
    dividend <- divisor
    divisor <- remainder
    if (remainder == 0) break
  }
  return(dividend)
}

gcd(6,15)
gcd(25,75)
gcd(31,33)

#4.4.4.2 Deterministic Loops
cube.root.table <- function(n)  {
  for (x in 1:n)  {
    cat("The cube root of", x, "is", cube.root(x),"\n")
  }
}

#4.4.5 Further Activity
cat( sprintf("The cube root of %4.0f is %8.3f \n", x, cube.root(x)) )

library(GISTools)
library(sf)
data(georgia)
adj.list <- list()   # create an empty list for the results
georgia_sf <- st_as_sf(georgia2)  # convert georgia to sf
county.i <- georgia_sf[1,]        # extract a single county
# determine the adjacent counties
# the [-1] removes Appling form its own list
adj.i <- unlist(st_intersects(county.i, georgia_sf))[-1]
# extract their names
adj.names.i <- georgia2$Name[adj.i]
# add to the list
adj.list[[1]] <- adj.i
# name the list elements
names(adj.list[[1]]) <- adj.names.i

adj.list

#[[1]]
#Bacon Jeff Davis     Pierce   Tattnall     Toombs
#3         80        113        132        138

# in sequence
adj.list[[2]] <- sample(1:100, 3)
# or not!
i = 4
adj.list[[i]] <- c("Chris", "and", "Lex")
# have a look!
adj.list

#4.5 Spatial data structures
rm(list = ls())

library(GISTools)
data(georgia)
ls()  #or in RStudio look at Environment pane

class(georgia.polys)
str(georgia.polys)
#[1] "list"
georgia.polys
head(georgia.polys[[1]], 2)

#[,1]    [,2]
#[1,] 1292287 1075896
#[2,] 1292654 1075919

# plot Appling
plot(georgia.polys[[1]], asp=1, type='l', xlab = "Easting", ylab = "Northing")
# plot adjacent county outlines
points(georgia.polys[[3]], asp=1, type='l', col = "red")
points(georgia.polys[[151]], asp=1, type='l', col = "blue", lty = 2)
#Figure 4.1: A simple plot of Appling County and 2 adjacent counties.

head(georgia2@polygons[[1]]@Polygons[[1]]@coords)
head(georgia2@data[, 13:14])

g <- st_as_sf(georgia2)
head(g[,13:14])

#4.6 Apply functions
library(GISTools)
data(newhaven) #many spatial datasets: blocks, breach, places, tracts etc
class(blocks)
## the @data route
head(blocks@data[, 14:17])
## the data frame route
head(data.frame(blocks[, 14:17]))

apply(blocks@data[,14:17], 1, max)

apply(blocks@data[,14:17], 2, max)

# set up vector to hold result
result.vector <- vector()
for (i in 1:nrow(blocks@data))  {
  # for each row determine which column has the max value
  result.i <- which.max(data.frame(blocks[i,14:17]))
  # put into the result vector
  result.vector <- append(result.vector, result.i)
}

res.vec <-apply(data.frame(blocks[,14:17]), 1, which.max)
# compare the two results
identical(as.vector(res.vec), as.vector(result.vector))

# Loop
t1 <- Sys.time()
result.vector <- vector()
for (i in 1:nrow(blocks@data)){
  result.i <- which.max(data.frame(blocks[i,14:17]))
  result.vector <- append(result.vector, result.i)
}
Sys.time() - t1

# Apply
t1 <- Sys.time()
res.vec <-apply(data.frame(blocks[,14:17]), 1, which.max)
Sys.time() - t1

plot(bbox(georgia2)[1,], bbox(georgia2)[2,], asp = 1,
     type='n', xlab='', ylab='', xaxt='n', yaxt='n', bty='n')
for (i in 1:length(georgia.polys)){
  points(georgia.polys[[i]], type='l')
  # small delay so that you can see the plotting
  Sys.sleep(0.05)
}

plot(bbox(georgia2)[1,], bbox(georgia2)[2,], asp = 1,
     type='n',xlab='',ylab='',xaxt='n',yaxt='n',bty='n')
invisible(mapply(polygon,georgia.polys))

# create a distance matrix
georgia_sf <- st_as_sf(georgia2)
dMat <- as.matrix(dist(coordinates(georgia2)))
dim(dMat)
# create an empty vector
count.vec <- vector()
# create an empty list
names.list <- list()
# for each county...
for( i in 1:nrow(georgia_sf)) {
  # which counties are within 50km
  vec.i <- which(dMat[i,] <= 50000)
  # add to the vector
  count.vec <- append(count.vec, length(vec.i))
  # find their names
  names.i <- georgia_sf$Name[vec.i]
  # add to the list
  names.list[[i]] <- names.i
}
# have a look!
count.vec
names.list

lapply(names.list, length)

#Self-test question 7: Answer below
#count.vec <- apply(dMat,1.my.func1)
#names.list <- apply(dMat,1.my.func2)

#4.7 Manipulating data with dplyr
vignette('dplyr', package = 'dplyr')

install.packages("nycflights13")
library("nycflights13")
class(flights)
flights

data(package = "nycflights13")

vignette('two-table', package = 'dplyr')

flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier)

flights2 <- select(flights, year:day, hour, origin, dest, tailnum, carrier)

library(nycflights13)
library(tidyverse)
# select the variables
flights2 <- flights %>% select(origin, dest)
# remove Alaska and Hawaii
flights2 <- flights2[-grep("ANC", flights2$dest), ]
flights2 <- flights2[-grep("HNL", flights2$dest), ]
# group by destination
flights2 <- group_by(flights2, dest)
flights2 <- summarize(flights2, count = n())
# assign Lat and Lon for Origin
flights2$OrLat <- 40.6925
flights2$OrLon <- -74.16867
# have a look!
flights2
# A tibble: 103 x 4

#Some mapping
library(maps)
library(geosphere)
# origin and destination examples
dest.eg <- matrix(c(77.1025, 28.7041), ncol = 2)
origin.eg <- matrix(c(-74.16867, 40.6925), ncol = 2)
# map the world from the maps package data
map("world", fill=TRUE, col="white", bg="lightblue")
# plot the points
points(dest.eg, col="red", pch=16, cex = 2)
points(origin.eg, col = "cyan", pch = 16, cex = 2)
# add the route
for (i in 1:nrow(dest.eg)) {
  lines(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                       breakAtDateLine=FALSE, addStartEnd=FALSE,
                       sp=FALSE, sepNA), lwd = 2, lty = 2)
}

#Other maps from the maps package
map("usa", fill=TRUE, col="white", bg="lightblue")

#4.8 Answers to self-test questions
#Q1
cube.root.2 <- function(x)  {
  if (is.numeric(x))  {
    result <- sign(x)*abs(x)^(1/3)
    return(result) 
  } else {
    cat("WARNING: Input must be numerical, not character\n")
    return(NA)
  }
}

#Q2
gcd <- function(a,b)  {
  divisor <- min(a,b) # line 1
  dividend <- max(a,b) # line 1
  repeat  { #line 5
    remainder <- dividend %% divisor #line 2
    dividend <- divisor # line 3
    divisor <- remainder # line 4
    if (remainder == 0) break #line 6
  }
  return(dividend)
}

#Q3 part i)
gcd.60 <- function(a)  {
  for(i in 1:a)
  { divisor <- min(i,60)
  dividend <- max(i,60)
  repeat  {
    remainder <- dividend %% divisor
    dividend <- divisor
    divisor <- remainder
    if (remainder == 0) break
  }
  cat(dividend, "\n")  
  }
}

gcd.60 <- function(a)  {
  for(i in 1:a) {
    dividend <- gcd(i,60)
    cat(i,":", dividend, "\n")   
  }
}

#Q3 part ii)
gcd.all <- function(x,y)  {
  for(n1 in 1:x) {
    for (n2 in 1:y) {
      dividend <- gcd(n1, n2)
      cat("when x is", n1, "& y is", n2, "dividend =", dividend,"\n")
    }
  }
}

#Q4
cube.root.table <- function(n)  {
  for (x in seq(0.5, n, by = 0.5))  {
    cat("The cube root of ",x," is",
      sign(x)*abs(x)^(1/3), "\n")  }
}

cube.root.table <- function(n)  {
  if (n < 0 ) by.val = 0.5
  if (n < 0 ) by.val =-0.5
  for (x in seq(0.5, n, by = by.val))  {
    cat("The cube root of ",x," is",
      sign(x)*abs(x)^(1/3),"\n") }
}

#Q5

# create an empty list for the results
adj.list <- list()
# convert georgia to sf
georgia_sf <- st_as_sf(georgia2)
for (i in 1:nrow(georgia_sf)) {
  # extract a single county
  county.i <- georgia_sf[i,]
  # determine the adjacent counties
  # the [-1] removes Appling form its own list
  adj.i <- unlist(st_intersects(county.i, georgia_sf))[-1]
  # extract their names
  adj.names.i <- georgia2$Name[adj.i]
  # add to the list
  adj.list[[i]] <- adj.i
  # name the list elements
  names(adj.list[[i]]) <- adj.names.i
}

#Q6
return.adj <- function(sf.data){
  # convert to sf regardless!
  sf.data <- st_as_sf(sf.data)
  adj.list <- list()
  for (i in 1:nrow(sf.data)) {
    # extract a single county
    poly.i <- sf.data[i,]
    # determine the adjacent counties
    adj.i <- unlist(st_intersects(poly.i, sf.data))[-1]
    # add to the list
    adj.list[[i]] <- adj.i
  }
  return(adj.list)

# test it!
return.adj(georgia_sf)
return.adj(blocks)

#Q7
# number of counties within 50km
my.func1 <- function(x){
  vec.i <- which(x <= 50000)[-i]
  return(length(vec.i))
}
# their names
my.func2 <- function(x){
  vec.i <- which(x <= 50000)
  names.i <- georgia_sf$Name[vec.i]
  return(names.i)
}
count.vec <- apply(dMat,1, my.func1)
names.list <- apply(dMat,1, my.func2)

#Q8
# Part 1: the join
flights2 <- flights2 %>% left_join(airports, c("dest" = "faa"))
flights2 <- flights2 %>% select(count, dest, OrLat, OrLon, DestLat=lat, DestLon=lon)
# get rid of any NAs
flights2 <- flights2[!is.na(flights2$DestLat),]
flights2

# Part 2: the plot
# Using standard plots
dest.eg <- matrix(c(flights2$DestLon, flights2$DestLat), ncol = 2)
origin.eg <- matrix(c(flights2$OrLon, flights2$OrLat), ncol = 2)
map("usa", fill=TRUE, col="white", bg="lightblue")
points(dest.eg, col="red", pch=16, cex = 1)
points(origin.eg, col = "cyan", pch = 16, cex = 1)
for (i in 1:nrow(dest.eg)) {
  lines(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                       breakAtDateLine=FALSE,
                       addStartEnd=FALSE, sp=FALSE, sepNA))
}

# using ggplot
all_states <- map_data("state")
dest.eg <- data.frame(DestLon = flights2$DestLon,
                      DestLat = flights2$DestLat)
origin.eg <- data.frame(OrLon = flights2$OrLon,
                        OrLat = flights2$OrLat)
library(GISTools)
# Figure 2 using ggplot
# create the main plot
mp <- ggplot() +
  geom_polygon( data=all_states,
                aes(x=long, y=lat, group = group),
                colour="white", fill="grey20") +
  coord_fixed() +
  geom_point(aes(x=dest.eg$DestLon, y=dest.eg$DestLat), color="#FB6A4A", size=2) +
  theme(axis.title.x=element_blank(),
        axis.text.x =element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        axis.text.y =element_blank(),
        axis.ticks.y=element_blank())
# create some transparent shading
cols = add.alpha(colorRampPalette(brewer.pal(9,"Reds"))(nrow(flights2)), 0.7)
# loop through the destinations
for (i in 1:nrow(flights2)) {
  # line thickness related flights
  lwd.i = 1+ (flights2$count[i]/max(flights2$count))
  # a sequence of colours
  cols.i = cols[i]
  # create a dataset
  link <- as.data.frame(gcIntermediate(dest.eg[i,], origin.eg[i,], n=50,
                                       breakAtDateLine=FALSE, addStartEnd=FALSE, sp=FALSE, sepNA))
  names(link) <- c("lon", "lat")
  mp <- mp + geom_line(data=link, aes(x=lon, y=lat), color= cols.i, size = lwd.i)
}
mp # plot!

Wednesday, February 19, 2020

R cartogram and WDI

#cartogramEx.R
#wdi and cartogram
#https://trucvietle.me/r/tutorial/2016/12/18/cartogram-plotting-using-r.html

setwd('~/Downloads')  #shapefile is here, fetched from thematic mapping org
library(Rcartogram)
library(getcartr)
library(ggplot2)
require(WDI)  #PREV: install.packages(c('RJSONIO', 'wdi'))
WDIsearch('gdp.*capita.*constant')
dat = WDI(indicator='NY.GDP.PCAP.KD', start=2012, end=2012, extra=TRUE)  #include aggregates ie continents
datsmall <- data.frame(iso3 = dat$iso3c, persoinc = dat$NY.GDP.PCAP.KD)
datsmall <- na.omit(datsmall)

world <- readShapePoly('TM_WORLD_BORDERS-0.3.shp')

## Join the two datasets using their common field
matchIds <- match(world@data[, "ISO3"], datsmall[, "iso3"])
world@data <- data.frame(world@data, datsmall[matchIds, ])
worldCarto <- quick.carto(world, world@data$persoinc, blur = 0.5)

## Convert the object into data frame
world.f <- fortify(worldCarto, region = "iso3")
## Merge the cartogram transformation with the world map shapefile
world.f <- merge(world.f, world@data, by.x = "id", by.y = "iso3")
my_map <- ggplot(world.f, aes(long, lat, group = group, fill = world.f$persoinc)) +
  geom_polygon() + theme_void() + scale_fill_continuous(name="Average\nIncome") ##ODD WAY to name legend

(my_map <- my_map + ggtitle("Cartogram of average income (2012)"))

Tuesday, February 18, 2020

sandi Skype mulai dgn huruf yg sama

sandi Skype mulai dgn huruf yg sama

SDG Atlas in R, requiring worldborder/TM_WORLD_BORDERS-0.3

#sdgatlas.R
#require getwdi.do in Stata [below], 

# and WDI document to know handle or var name of theme
# this may require filling in missing values
#to draw map of SDG atlas for countries around the world 

# based on World Bank's World Development Indicators
setwd('C:/data/GDI/GlobalInqSocDev2017-2018')
require(foreign)
require(rgdal)
require(tmap)
world <- readOGR(dsn = './worldborder', layer= "TM_WORLD_BORDERS-0.3")
#based on WDI :32
wtheme <- read.dta('worldtheme.dta')
wtheme$countrycode <- as.factor(wtheme$countrycode)
world@data = data.frame(world@data, wtheme[match(world@data[, "ISO3"], wtheme[, "countrycode"]), ])
#tm_shape(world) + tm_polygons("si_pov_nahc", style="quantile")
#dev.copy(png, 'SDGpov.png')
tm_shape(world) + tm_polygons("si_pov_gini", style="quantile")
dev.copy(png, 'mapgini.png')
dev.off()

#tm_shape(world) + tm_polygons("si_pov_nahc", style="quantile")
#dev.copy(png, 'mappoverty.png')
#dev.off()

*getwdi.do: a simple way to get World Indicators
cd ~/GDI2017  //then consult WDI to get var name of theme: si.pov.nahc
tempfile tmp
wbopendata, language(en - English) indicator(si.pov.nahc) long clear latest
sort countrycode
save `tmp', replace
sysuse world-d, clear
merge countrycode using `tmp'
*may want to fill in missing values
*spmap  si_pov_nahc using "world-c.dta", id(_ID)
saveold worldtheme, version(12)

Monday, February 17, 2020

Install rcartogram package in Windows 10

Preparing R for rcartogram package in Windows 10
requires uninstalling rtools 3.3, then installing rtools 3.5 (ensure PATH is updated accordingly).
also tries to direct R to install packages under HOME/library, by editing HOME/etc/Rprofile.site
  (editing with administrator rights because HOME is under C:/Program Files).
Also, the HOME folder needs its property set (right click, security or permission to write).
1. in R, use installr package to updater()
2. update RStudio IDE; check both RStudio and R to be the latest versions
3. check Rtools 3.5 are put in the common folders C:\Rtools\bin etc
3b. check the R tools paths are in the System PATH variable (Control panel .
  Advanced System...) Ensure the order of the paths follow Rtools README.txt
  or Rtools.txt guidance
4. install.packages("devtools"), or better follow this https://cran.r-project.org/web/packages/devtools/readme/README.html
5. install fftw3 package
5b. look at HOME/library while installing fftw3 to see it's put there.
6. Ensure fftw3 bin is in the PATH variable too (previously it's under C:/msys, now elsewhere)
7. then follow this by Geoff Lee: https://raw.githubusercontent.com/Geoff99/Rcartogram/WindowsInstall/vignettes/README.WindowsInstall.Tutorial.Rmd
8. Test!

spatial multilevel model HSAR example, Beijing

require(spdep)
require(HSAR)
require(rgeos)
require(classInt)
require(RColorBrewer)

data(Beijingdistricts)  #spdf 111 elements
plot(Beijingdistricts,border="light grey")
# extract the area of each district
Beijingdistricts$geo.area <- gArea(Beijingdistricts,byid=TRUE) / 1000000
x <- Beijingdistricts$geo.area
breaks <- classIntervals(x,4,"fisher")$brks
groups <- cut(x,breaks,include.lowest=TRUE,labels=FALSE)
palette <- brewer.pal(4, "Blues")
plot(Beijingdistricts,col=palette[groups],border="grey")

# extract the district level spatial weights matrix
nb.list <- poly2nb(Beijingdistricts,queen=FALSE)  #edge list for 111 districts
mat.list <- nb2mat(nb.list,style="W")  #row-normal adjacency matrix
M <- as(mat.list,"dgCMatrix")

# Running the hsar() function using the Beijing land price data
data(landprice)  #it has district.id variable
# load shapefiles of Beijing districts and land parcels
data(Beijingdistricts)
data(landSPDF)   #perhaps this was constructed from Beijingdistrics spdf and landprice dataframe
                #connected with district.id ?
plot(Beijingdistricts,border="green")
plot(landSPDF,add=TRUE,col="red",pch=16,cex=0.8)
# Define the random effect matrix
model.data <- landprice[order(landprice$district.id),]
head(model.data,20)

# the number of individuals within each neighbourhood
MM <- as.data.frame(table(model.data$district.id))
# the total number of neighbourhood, 100
Utotal <- dim(MM)[1]
Unum <- MM[,2]
Uid <- rep(c(1:Utotal),Unum)
n <- nrow(model.data)
Delta <- matrix(0,nrow=n,ncol=Utotal)  #large matrix 123987 elements
for(i in 1:Utotal) {
  Delta[Uid==i,i] <- 1
}
rm(i)
Delta[1:50,1:10]  #see


Delta <- as(Delta,"dgCMatrix")
# extract the district level spatial weights matrix using the queen's rule
nb.list <- poly2nb(Beijingdistricts)
mat.list <- nb2mat(nb.list,style="W")  #111 by 111 row-normal adjacency matrix
M <- as(mat.list,"dgCMatrix")
# extract the land parcel level spatial weights matrix
nb.25 <- dnearneigh(landSPDF,0,2500)
# to a weights matrix
dist.25 <- nbdists(nb.25,landSPDF)
dist.25 <- lapply(dist.25,function(x) exp(-0.5 * (x / 2500)^2))
mat.25 <- nb2mat(nb.25,glist=dist.25,style="W")  #V large matrix 1247689 els
W <- as(mat.25,"dgCMatrix")

## run the hsar() function
res.formula <- lnprice ~ lnarea + lndcbd + dsubway + dpark + dele +
  popden + crimerate + as.factor(year)
betas= coef(lm(formula=res.formula,data=landprice))
pars=list( rho = 0.5,lambda = 0.5, sigma2e = 2.0, sigma2u = 2.0, betas = betas )
## Not run:
res <- hsar(res.formula,data=model.data,W=W,M=M,Delta=Delta,
            burnin=5000, Nsim=10000, thinning = 1, parameters.start=pars)
summary(res)
res_err <- hsar(res.formula,data=model.data,W=NULL,M=M,Delta=Delta,
            burnin=5000, Nsim=10000, thinning = 1, parameters.start=pars)
summary(res_err)
# visualise the district level random effect
library(classInt)
library(RColorBrewer)
x <- as.numeric(res$Mus)
breaks <- classIntervals(x,4,"fisher")$brks
groups <- cut(x,breaks,include.lowest=TRUE,labels=FALSE)
palette <- brewer.pal(4, "Blues")
plot(Beijingdistricts,col=palette[groups],border="grey")

UNCOMTRADE from R

# uncomtrade.R : UN comtrade
require(comtradr)
require(dplyr)
require(foreign)
setwd('C:/_codeGDI')
aseanex <- ct_search(reporters = c("China", "Japan", 

                       "Rep. of Korea"),
                     partners = c("Indonesia", "Malaysia", "Singapore", "Philippines", "Thailand"),
                     trade_direction = "imports", type = "goods",
                     start_date = 2000, end_date = 2004)
asean2000 <- aseanex[c("year", "reporter_iso", "partner_iso",

                       "trade_value_usd")]

aseanex2 <- ct_search(reporters = c("China", "Japan",

                       "Rep. of Korea"),
                     partners = c("Indonesia", "Malaysia", "Singapore", "Philippines", "Thailand"),
                     trade_direction = "imports", type = "goods",
                     start_date = 2005, end_date = 2009)
asean2005 <- aseanex2[c("year", "reporter_iso", "partner_iso", "trade_value_usd")]
aseanex3 <- ct_search(reporters = c("China", "Japan", "Rep. of Korea"),
                      partners = c("Indonesia", "Malaysia", "Singapore", "Philippines", "Thailand"),
                      trade_direction = "imports", type = "goods",
                      start_date = 2010, end_date = 2014)
asean2010 <- aseanex3[c("year", "reporter_iso", "partner_iso", "trade_value_usd")]
asean <- dplyr::bind_rows(asean2000, asean2005, asean2010)
summary(asean)
rm(asean2000, asean2005, asean2010, aseanex, aseanex2, aseanex3)
write.dta(asean, 'C:/_codeGDI/aseantrade20002010.dta')

Cartogram for disability and internet access in Indonesia

#choro-carto-prov-kab.R
library(tmap)
library(maptools)
library(ggplot2)
library(rgeos)
library(foreign)
library(Rcartogram)
library(getcartr)

setwd('~/_codeIDNgeo')
idnprov <- readRDS('IDN_adm1.rds')  #idnkab: IDN_adm2.rds  #plot(idnprov)

disab2015 <- read.dta('~/_negeriku/tmpdisabbyidgeo.dta')
  #prepared by ana-dddisab.do ...

net2015   <- read.dta('~/_negeriku/tmpnetbyidgeo.dta')
  # ...which generated idgeo

idnprov@data = data.frame(idnprov@data,
  disab2015[match(idnprov@data[, "ID_1"], disab2015[, "idgeo"]), ])

idnprov@data = data.frame(idnprov@data, 
  net2015[match(idnprov@data[, "ID_1"], net2015[, "idgeo"]), ])

summary(idnprov$disab)
summary(idnprov$net)

disab.carto <-quick.carto(idnprov,idnprov$disab, blur=1)
spplot(disab.carto, "disab", col.regions=brewer.pal(3, "Greens"),
  main = "Cartogram of disability")

net.carto   <-quick.carto(idnprov,idnprov$net, blur=1)
spplot(idnprov,   "netcat", col.regions=brewer.pal(3, "Greens"),
  main = "Original projection")
spplot(net.carto, "netcat", col.regions=brewer.pal(3, "Greens"),
  main = "Cartogram of internet access")

pdf('~/_negeriku/net2015.pdf',width=6,height=4,paper='special') 
  par(mfrow=c(2,1),mar=c(0.5,0.5,3,0.5))
  plot(idnprov, main = "Original Projection")
  plot(net.carto, main = "Cartogram of internet access ")
dev.off()

####################################PROVINSI LAGI
idn <- rgdal::readOGR('IDN_adm1.shp')
#plot(idn)  #periksa: bagus.

idnY <- read.dta('~/_negeriku/kerjafml2014.dta')
  #theme: ID_1, formal job, prov
idnY$ID_1 <- as.factor(idnY$ID_1)   #prov 34 values

idnm <- append_data(idn, idnY, key.shp = 'ID_1', key.data = 'ID_1',
                    ignore.na = TRUE, ignore.duplicates = TRUE)
#map data, ID_1 34 values!

choromap <- qtm(idnm, fill='Formal', 
  legend.position = c("left", "bottom"))
choromap

#sekarang buat kartogram
idn.carto <- quick.carto(idnm, idnm@data$Formal, blur=0)
  #bbrp menit kemudian...
#quick.carto doesn't like exactly zero values; naturally!
#  But it can cope with missing!

idn.buffer <- gBuffer(idn.carto, width=0, byid=TRUE)
idn.f <- fortify(idn.buffer, region="ID_1") 
  #ID_1 -> id; convert to a regular data frame: KABU or ID2014_2
idn.f$id <- as.factor(idn.f$id)  
  #my peace of mind that 'id' and 'ID_1" same-typed
idn.f <- merge(idn.f, idnm@data, by.x="id", by.y="ID_1") 
  #merge cartogram with map

carto.fjob <- ggplot(idn.f, aes(long, lat, group=group,
    fill=idn.f$Formal)) + 
  labs(fill="Formal jobs") + geom_polygon() + 
  ggtitle("Cartogram formal jobs 2014")
carto.fjob

#KABUPATEN-KOTA
idn <- rgdal::readOGR('petabps2014.shp')   
#plot(idn)  #periksa: bagus.

idnY  <- read.dta('~/_negeriku/ssn14rawat.dta') #theme var
idnY$KABU <- as.factor(idnY$KABU)   #KABU 497 values
summary(idnY)
  #catch zeroes because quick.carto abhors them.
  # Better set it to missing.
idnY2 <- read.table('~/_negeriku/cataskabu50.tsv', 
   sep='\t', header=TRUE)  #another theme var
idnY2$cdkabkota <- as.factor(idnY2$cdkabkota)   # 497 values
summary(idnY2)  
  #catch zeroes because quick.carto abhors 'em.
  # Drop 'em or give 'em an average value
idnY2 <- idnY2[idnY2$catas>0,]

idnY <- merge(idnY, idnY2, by.x='KABU', by.y='cdkabkota')

idnm <- tmaptools::append_data(idn, idnY, 
           key.shp = 'ID2014_2', key.data = 'KABU',
           ignore.na = TRUE, ignore.duplicates = TRUE) 
#map data, ID2014_2 514 values!
#Please check whether the variables are renamed with .x or .y

choromap <- qtm(idnm, fill='rj')  #peta tema
choromap #takes time!

chorocatas <- qtm(idnm, fill='catas')  #gambar peta tema (with renamed var? CHECK THIS NAME!)
chorocatas #takes time!

#sekarang buat kartogram
idn.carto <- quick.carto(idnm, idnm@data$rj, blur=0)  
  #bbrp menit kemudian...
  #quick.carto doesn't like exactly zero values; naturally!
  # But it can cope with missing!

idn.buffer <- gBuffer(idn.carto, width=0, byid=TRUE)  
  ##47 warnings for missing values!
idn.f <- fortify(idn.buffer, region="ID2014_2")  
  #convert to a regular data frame: KABU or ID2014_2
idn.f <- merge(idn.f, idnm@data, by.x="id", by.y="ID2014_2") 
  #merge cartogram with map

cartorj <- ggplot(idn.f, aes(long, lat, group=group, 
     fill=idn.f$rj)) + 
  labs(fill="Rawat jalan") + geom_polygon() + 
  ggtitle("Cartogram rawat jalan 2014")
cartorj #neat!

### buat kartogram catastrophic health expenditure
idn.carto2 <- quick.carto(idnm, idnm@data$catas, blur=0)  
  #bbrp menit kemudian...
idn.buffer2 <- gBuffer(idn.carto2, width=0, byid=TRUE)  
  ##47 warnings for missing values
idn.f2 <- fortify(idn.buffer2, region="ID2014_2")  
  #convert to a regular data frame: KABU or ID2014_2
idn.f2 <- merge(idn.f2, idnm@data, by.x="id", by.y="ID2014_2") #merge cartogram with map
cartocatas <- ggplot(idn.f2, aes(long, lat, group=group,
     fill=idn.f2$catas)) + 
  labs(fill="Catastrophic health exp") + geom_polygon() +
  ggtitle("Catastrophic health exp")
cartocatas #neat!

Cartogram for multimorbidity in England

#map-mM.R
#prevalence preserving map of multimorbidity in England
#GT 12 Jan 2016, 28 July 2017, 13 Dec 2019

#install.packages('Rcartogram', repos='http://www.omegahat.org/R', type='source')
#actually: stackoverflow.com/questions/31613119/installing-rcartogram-packages-error-message
#Use Geoff Lee's fork of Rcartogram github

# note carefully: PATH, fftw3-3, 
# downloading from github and unzipping the bundle, 
# setting working directory etc

#Do not use R package fftw!

#install.packages('devtools')
#  bits and pieces may need to be installed in piecemeal
#library(devtools)
#install_github('chrisbrunsdon/getcartr',subdir='getcartr')
library(Rcartogram)
library(getcartr)
library(rgdal)
library(ggplot2)
library(ggmap)
library(mapproj)
library(Cairo)
library(foreign)
library(scales)
library(dplyr)
library(plyr)

setwd('~/_projects/MALAM')
dir()
dir('England_lad_shp_2011')

#choro
engY <- read.dta('w7ticsLA.dta') #OSLAUA2 is chr
engY$OSLAUA2 <- as.factor(engY$OSLAUA2)
engY <- rename(engY, c("tics27demen"="Dementia"))
names(engY)
summary(engY$Dementia)  #min 0, max 19%
summary(engY$multimor)  #min 0, max 29%

engshp <- readOGR('England_lad_shp_2011/england_lad_2011_gen_clipped.shp')
names(engshp) #altname name code: all factors 
engshp <- fortify(engshp, region="code")
  # code -> id
eng2 <- merge(engshp, engY, by.x="id", by.y="OSLAUA2")

#Choro dementia
chorodem <- ggplot() + 
  geom_polygon(data = eng2, aes(long, lat, group=group,
    fill=Dementia)) +
  scale_fill_distiller(palette = "Reds", trans = "reverse", 
    labels = percent, breaks = pretty_breaks(n=5)) +
  guides(fill = guide_legend(reverse = TRUE)) +  
  theme_nothing(legend = TRUE)
chorodem
ggsave(chorodem, file = "dementiachoro.png", width = 5.5,
  height = 7, type = "cairo-png")

#Choro multimorbidity
choromm <- ggplot() + 
  geom_polygon(data = eng2, aes(long, lat, group=group,
    fill=multimor)) +
  scale_fill_distiller(palette = "Greens", trans = "reverse",
    labels = percent, breaks = pretty_breaks(n=5)) +
  guides(fill = guide_legend(reverse = TRUE)) +  
  theme_nothing(legend = TRUE)
choromm
ggsave(choromm, file = "multimorchoro.png", width = 5.5,
  height = 7, type = "cairo-png")

#carto dementia
engY <- read.dta('w7ticsLA.dta') #OSLAUA2 is chr
engY$OSLAUA2 <- as.factor(engY$OSLAUA2)
engY <- rename(engY, c("tics27demen"="Dementia"))
names(engY)

engshp <- readOGR('England_lad_shp_2011/england_lad_2011_gen_clipped.shp') #reread
match.indices <- match(engshp@data[, "code"], engY[, "OSLAUA2"])
engshp@data <- data.frame(engshp@data, engY[match.indices, ])

engshp.carto <- quick.carto(engshp, engshp@data$Dementia, blur=0)
  # minutes later...
engshp.buffer <- gBuffer(engshp.carto, width=0, byid=TRUE)
engshp.f <- fortify(engshp.buffer, region="OSLAUA2")
  #ONS_LABEL -> id
engshp.f <- merge(engshp.f, engshp@data, by.x="id", by.y="OSLAUA2")    #merge carto with map

#england.f$qimd <- england.f$qimd/100
cartodem <- ggplot() + 
  geom_polygon(data = engshp.f, aes(long, lat, group=group,
    fill=Dementia)) +
  scale_fill_distiller(palette = "Reds", trans = "reverse",
    labels = percent, breaks = pretty_breaks(n=5)) +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme_nothing(legend = TRUE)  
cartodem
ggsave(cartodem, file = "dementiacarto.png", width = 5.5,
  height = 7, type = "cairo-png")

#carto multimor
engshp.cartomm <- quick.carto(engshp, engshp@data$multimor, blur=0)    # minutes later...
engshp.buffermm <- gBuffer(engshp.cartomm, width=0, byid=TRUE)
engshp.fmm <- fortify(engshp.buffermm, region="OSLAUA2")
  #ONS_LABEL -> id
engshp.fmm <- merge(engshp.fmm, engshp@data, by.x="id", by.y="OSLAUA2") #merge carto with map

cartomm <- ggplot() + 
  geom_polygon(data = engshp.f, aes(long, lat, group=group,
    fill=multimor)) +
  scale_fill_distiller(palette = "Greens", trans = "reverse",
    labels = percent, breaks = pretty_breaks(n=5)) +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme_nothing(legend = TRUE)  
cartomm
ggsave(cartomm, file = "multimorcarto.png", width = 5.5,
  height = 7, type = "cairo-png")

Friday, February 14, 2020

R and internet data (Brunsdon & Comber 2019 ch 9)

#Brunsdon & Comber ch 9: R and internet data
# See also https://www.r-spatial.org/r/2018/10/25/ggplot2-sf.html
# Spatial data science
# Or Using UNCOMTRADE data with R
#https://comtrade.un.org/Data/Doc/api/ex/r

#################################################################
#In Stata: jsonio rv, file("https://data.police.uk/api/crimes-street/all-crime?lat=52.629729&lng=-1.131592&date=2017-01")

#local url     "https://data.police.uk/api/crimes-street/all-crime?"
#local url_lat "lat=52.629729&"
#local url_lng "lng=-1.131592&"
#local url_dt  "date=2017-01"
#jsonio kv, file("`url'`url_lat'`url_lng'`url_dt'")
#################################################################

require(devtools)
require(eurostat)
require(ggplot2)
require(dplyr)

#9.1 Introduction
#9.2 Direct Access to Data
fpe <- read.table("http://data.princeton.edu/wws509/datasets/effort.dat")
head(fpe, 2)
##           setting effort change
## Bolivia        46      0      1
## Brazil         74      0     10
## Chile          89     16     29
pairs(fpe, panel=panel.smooth)

#Graph viz
require(Rgraphviz)
require(datasets)

# Load the state.x77 data
data(state)
# Which ones are 'connected' - ie abs correlation above 0.5
connected <- abs(cor(state.x77)) > 0.5 
# Create the graph - node names are the column names
conn <- graphNEL(colnames(state.x77))
# Populate with edges - join variables that are TRUE in 'connected'
for (i in colnames(connected)) {
  for (j in colnames(connected)) {
    if (i < j) {
      if (connected[i,j]) {
        conn <- addEdge(i,j,conn,1)}}}}
# Create a layout for the graph
conn <- layoutGraph(conn)
# Specify some drawing parameters
attrs <- list(node=list(shape="ellipse", 
                        fixedsize=FALSE, fontsize=12))

plot(conn,attrs=attrs)
#Figure 9.2: Illustration of Rgraphviz

#9.3 Using RCurl
library(RCurl) # Load RCurl

# Get the content of the URL and store it into 'temp'
stem <- 'https://www.gov.uk/government/uploads/system/uploads'
file1 <- '/attachment_data/file/15240/1871702.csv'
temp <- getURL(paste0(stem,file1))

# Use textConnection to read the content of temp
# as though it were a CSV file
imd <- read.csv(textConnection(temp))
# Check - this gives the first 10 column names of the data frame
head(colnames(imd), 10)
## [1] "X.html..body.You.are.being..a.href.https...assets.publishing.service.gov.uk.government.uploads.system.uploads.attachment_data.file.15240.1871702.csv.redirected..a....body...html."
read.csv.https <- function(url) {
  temp <- getURL(url)
  return(read.csv(textConnection(temp)))
}
# Download the csv dataand put it into 'imd2'
imd <- read.csv(paste0(stem,file1))
# Modify the margins around the plot, to fit the GOR
# names into the left-hand margin
par(mar=c(5,12,4,2) + 0.1)
# Create the boxplot. The 'las' parameter specificies y-axis 
# labelling is horizontal, x-axis is vertical
boxplot(IMD.SCORE~GOR.NAME,data=imd,horizontal=TRUE,las=2)
#Boxplot of IMDs by Goverment Office Regions
#Figure 9.3: Boxplot of IMDs by Goverment Office Regions

#9.4 Working with APIs
require(rjson)
crimes.buf <- getForm("http://data.police.uk/api/crimes-street/all-crime",  #may need https protocol
  lat="52.629729",
  lng="-1.131592",
  date="2017-01")

crimes <- fromJSON(crimes.buf)
crimes[[1]]
## 
## $month
## [1] "2016-04"

getLonLat <- function(x) as.numeric(c(x$location$longitude,
                                      x$location$latitude))
crimes.loc <- t(sapply(crimes,getLonLat))
head(crimes.loc)
##           [,1]     [,2]
## [1,] -2.979198 53.40392
## [2,] -2.987014 53.40565

getAttr <- function(x) c(
  x$category,
  x$location$street$name,
  x$location_type)
crimes.attr <- as.data.frame(t(sapply(crimes,getAttr)))
colnames(crimes.attr) <- c("category","street","location_type")
head(crimes.attr)
##                category                      street location_type
## 1 anti-social-behaviour On or near Back Bold Street         Force
## 2 anti-social-behaviour    On or near Shopping Area         Force

library(sp)
crimes.pts <- SpatialPointsDataFrame(crimes.loc,crimes.attr)
# Specify the projection - in this case just geographical coordinates
proj4string(crimes.pts) <- CRS("+proj=longlat")
# Note that 'head' doesn't work on SpatialPointsDataFrames
crimes.pts[1:2,] 
##             coordinates              category                      street
## 1 (-2.979198, 53.40392) anti-social-behaviour On or near Back Bold Street
## 2 (-2.987014, 53.40565) anti-social-behaviour    On or near Shopping Area
##   location_type
## 1         Force
## 2         Force

asb.pts <- crimes.pts[crimes.pts$category=="anti-social-behaviour",]
cda.pts <- crimes.pts[crimes.pts$category=="criminal-damage-arson",]
require(tmap)
## Loading required package: tmap
require(tmaptools)
## Loading required package: tmaptools
asb_cda.pts <- rbind(asb.pts,cda.pts)
asb_cda.pts$category <- as.character(asb_cda.pts$category)
tm_shape(asb_cda.pts) + tm_dots(col='category',title='Crime Type',
                                labels=c("Antisocial Behaviour","Criminal Damage"),
                                palette=c('indianred','dodgerblue'),size=0.1) 
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
#Locations of anti-social behaviour and criminal damage/arson incidents
#Figure 9.4: Locations of anti-social behaviour and criminal damage/arson incidents

tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(asb_cda.pts) + tm_dots(col='category',title='Crime Type',
                                labels=c("Antisocial Behaviour","Criminal Damage"),
                                palette=c('indianred','dodgerblue'),size=0.02) 

#9.5 Creating a Statistical ‘Mashup’
terr3bed.buf <- getForm("https://api.nestoria.co.uk/api",
                        action='search_listings',
                        place_name='liverpool',
                        encoding='json',
                        listing_type='buy',
                        number_of_results=50,
                        bedroom_min=3,bedroom_max=3,
                        keywords='terrace')
## Ignore warning.

terr3bed <- fromJSON(terr3bed.buf)
getHouseAttr <- function(x) {
  as.numeric(c(x$price/1000,x$longitude,x$latitude))
}

terr3bed.attr <- as.data.frame(t(sapply(terr3bed$response$listings,
                                        getHouseAttr)))

colnames(terr3bed.attr) <- c("price","longitude","latitude")
head(terr3bed.attr)
##    price longitude latitude
## 1 120.00 -2.940470 53.41720
## 2  95.00 -2.924914 53.40125

# Create an extra column to contain burglary rates
terr3bed.attr <- transform(terr3bed.attr,burgs=0)

# For each house in the data frame
for (i in 1:50) {
  # Firstly obtain crimes in a 1-mile radius of the houses
  # latitude and longitude and decode it from JSON form
  crimes.near <- getForm(
    "http://data.police.uk/api/crimes-street/all-crime",
    lat=terr3bed.attr$latitude[i],
    lng=terr3bed.attr$longitude[i],
    date="2017-01")
  crimes.near <- fromJSON(crimes.near)
  crimes.near <- as.data.frame(t(sapply(crimes.near,getAttr)))
  # Then from the 'category' column count the number of burlaries
  # and assign it to the burgs column
  terr3bed.attr$burgs[i] <- sum(crimes.near[,1] == 'burglary')
  # Pause before running next API request - to avoid overloading
  # the server
  Sys.sleep(0.7)
  # Note this stage may cause the code to take a minute or two to run
}

ggplot(terr3bed.attr,aes(x=burgs,y=price)) + geom_point() +
  geom_smooth(span=1) + 
  labs(x='Burglaries in a 1-Mile Radius',
       y='House Price (1000s Pounds)')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Scatter plot of Burglary Rate vs. House Price
#Figure 9.5: Scatter plot of Burglary Rate vs. House Price

#9.6 Using Specific Packages: eurostat
tgs00026 <- get_eurostat("tgs00026", time_format = "raw")
## Table tgs00026 cached at /var/folders/t4/3d2rv9_10wx7_xckhgslrmv00000gn/T//RtmpNT6Mtr/eurostat/tgs00026_raw_code_TF.rds
head(tgs00026)
## # A tibble: 6 x 5
##   unit     na_item geo   time  values
##   <fct>    <fct>   <fct> <chr>  <dbl>
## 1 PPCS_HAB B6N     AT11  2005   18100
## 2 PPCS_HAB B6N     AT12  2005   18900

tgs00026_06       <- tgs00026[tgs00026$time=='2006',]
tgs00026_10       <- tgs00026[tgs00026$time=='2010',]
tgs00026_10$delta <- 100*(tgs00026_10$values - tgs00026_06$values)/tgs00026_06$values
tgs00026_10$country <- substr(as.character(tgs00026_10$geo),1,2)
ggplot(tgs00026_10,aes(x=country,y=delta)) + geom_boxplot() +
  labs(x="Country",y="% Change in Household\n Income 2005-2010")
#Box plots of disposable income change 2005-10 by country (Eurostat data).
#Figure 9.6: Box plots of disposable income change 2005-10 by country (Eurostat data).

library(forcats)
tgs00026_10$country <- 
  fct_reorder(tgs00026_10$country,tgs00026_10$delta,median)
ggplot(tgs00026_10,aes(x=country,y=delta)) + geom_boxplot() + 
  labs(x="Country",y="% Change in Household\n Income 2005-2010")
#Median ordered box plots of disposable income change 2005-10 by country.
#Figure 9.7: Median ordered box plots of disposable income change 2005-10 by country.

tmap_mode('plot')
## tmap mode set to plotting
tgs_es <- tgs00026_10[tgs00026_10$country=='ES',]
tgs_map <- get_eurostat_geospatial(output_class = "sf", resolution = "10", nuts_level = "2") %>%
  right_join(tgs_es)
head(tgs_map)

ggplot() + geom_sf(data = tgs_map, aes(fill=delta))

tgs_mlmap <- tgs_map[!(tgs_map$NUTS_ID %in% c("ES70","ES53")),]
ggplot() + geom_sf(data = tgs_mlmap, aes(fill=delta))

#Household Income Change (\%, 2005-10) In Mainland  Spain
#Figure 9.9: Household Income Change (%, 2005-10) In Mainland Spain


##### From eurostat vignette
require(ggplot2)
require(dplyr)
ct <- c("AT", "BE", "BG", "DE", "ES", "FR", "IT", "SE", "UK")
dat <- get_eurostat(id ="tps00199", filters = list(geo=ct))
head(dat)
# A tibble: 6 x 4
#indic_de geo   time       values
#<fct>    <fct> <date>      <dbl>
#  1 TOTFERRT AT    2007-01-01   1.38
#  2 TOTFERRT AT    2008-01-01   1.42
ggplot(data = dat, mapping = aes(x=time, y=values, color=geo)) +
  geom_line()

# bar chart
dat_2015 <- dat %>% filter(time == "2015-01-01")
ggplot(dat_2015, aes(x = reorder(geo, values), y = values)) +
  geom_col(color = "white", fill = "grey80") +
  labs(title = "Total fertility rate, 2015", y = "%", x = NULL)

mapdata <- get_eurostat_geospatial(nuts_level = 0) %>%
  right_join(dat_2015) %>%
  mutate(categ = cut_to_classes(values, n=4, decimals=1))
head(select(mapdata, geo, values, categ), 2)
ggplot(mapdata, aes(fill = categ)) +
  geom_sf(color = alpha("white", .3), alpha = .6) +
  xlim(c(-12,44)) + ylim(c(35,70)) +
  labs(title = "Total fertility rate, 2015", subtitle = "Avg no life birth per women", fill = "%")


#9.7 Web Scraping
grepl('Chris[0-9]*',c('Chris','Lex','Chris1999','Chris Brunsdon'))
## [1]  TRUE FALSE  TRUE  TRUE
grep('Chris[0-9]*',c('Chris','Lex','Chris1999','Chris Brunsdon'))
## [1] 1 3 4
grep('Chris[0-9]*',c('Chris','Lex','Chris1999','Chris Brunsdon'),
     value=TRUE)
## [1] "Chris"          "Chris1999"      "Chris Brunsdon"

#9.7.1 Scraping Train Times
web.buf <- readLines(
  "https://traintimes.org.uk/durham/leicester/00:00/monday")
times <- grep("strong.*[0-2][0-9]:[0-5][0-9].*[0-2][0-9]:[0-5][0-9]",
              web.buf,value=TRUE)
locs <- gregexpr("[0-2][0-9]:[0-5][0-9]",times)
# Show the match information for times[1]
locs[[1]]
## [1] 46 60
## attr(,"match.length")
## [1] 5 5
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
timedata <- matrix(0,length(locs),4)
ptr <- 1
for (loc in locs) {
  timedata[ptr,1] <- as.numeric(substr(times[ptr],loc[1],loc[1]+1))
  timedata[ptr,2] <- as.numeric(substr(times[ptr],loc[1]+3,loc[1]+4))
  timedata[ptr,3] <- as.numeric(substr(times[ptr],loc[2],loc[2]+1))
  timedata[ptr,4] <- as.numeric(substr(times[ptr],loc[2]+3,loc[2]+4))
  ptr <- ptr + 1
}
colnames(timedata) <- c('h1','m1','h2','m2')
timedata <- transform(timedata,duration = h2 + m2/60 - h1 - m1/60 )
timedata
##   h1 m1 h2 m2 duration
## 1  4 59  8 24 3.416667
## 2  4 59  8 31 3.533333
## 3  6 38  9 18 2.666667
## 4  6 44  9 46 3.033333
## 5  6 57 10  0 3.050000