L e s s o n
R R Exploratory Data Analysis EDA R 12-1 Data Frame nrow() ncol() dim() > nrow(iris) # iris [1] 150 > ncol(iris) # iris [1] 5 > dim(iris) # iris [1] 150 5 nrow() number of rows ncol() number of columns dim() dimensions head() tail() names() 12-2
12 > head(iris) # iris Sepal.Length Sepal.Width Petal.Length Petal.Width Species 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3.0 1.4 0.2 setosa 3 4.7 3.2 1.3 0.2 setosa 4 4.6 3.1 1.5 0.2 setosa 5 5.0 3.6 1.4 0.2 setosa 6 5.4 3.9 1.7 0.4 setosa > tail(iris) # iris Sepal.Length Sepal.Width Petal.Length Petal.Width Species 145 6.7 3.3 5.7 2.5 virginica 146 6.7 3.0 5.2 2.3 virginica 147 6.3 2.5 5.0 1.9 virginica 148 6.5 3.0 5.2 2.0 virginica 149 6.2 3.4 5.4 2.3 virginica 150 5.9 3.0 5.1 1.8 virginica > names(iris) # iris [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species" summary() > summary(iris) # iris 12-1 summary(iris) 12-3
R str() str() str() structure > str(iris) 12-2 str(iris) 12-2 Base Plotting System R hist() hist(rnorm(1000)) boxplot() boxplot(sepal.length ~ Species, data = iris) plot(..., type = "l") plot(airpassengers, type = "l") plot() plot(cars) barplot() barplot(table(mtcars$cyl)) curve() curve(sin, from = 0, to = pi * 2) 12-4
12 hist() hist() histogram >hist(rnorm(1000)) 12-3 hist(rnorm(1000)) rnorm() 0 1 1000 boxplot() iris Species Sepal.Length > boxplot(sepal.length ~ Species, data = iris) 12-5
R 12-4 boxplot(sepal.length ~ Species, data = iris) plot(..., type = "l") > x <- seq(from = as.date("2017-01-01"), to = as.date("2017-01-31"), by = 1) > set.seed(123) # y > y <- sample(1:100, size = 31, replace = TRUE) > plot(x, y, type = "l") 12-5 plot(x, y, type = l ) 12-6
12 x 2017 1 31 1 100 31 y type = "l" line set.seed() sample() y 123 sample() set.seed(123) y R AirPassengers LakeHuron ts time series plot() type = "l" > class(airpassengers) [1] "ts" > class(lakehuron) [1] "ts" > plot(airpassengers) > plot(lakehuron) 12-6 plot(airpassengers) 12-7
R 12-7 plot(lakehuron) plot() > plot(cars$speed, cars$dist) 12-8 lot(carsspeed,carsdist) 12-8
12 plot() scatter matrix > plot(iris) 12-9 plot(carsspeed,carsdist) barplot() ice_cream_flavor 100 程式碼 12-1 > ice_cream_flavor <- rep(na, times = 100) > for (i in 1:100){ + ice_cream_flavor[i] <- sample(c("vanilla", "chocolate", "matcha", "other"), size = 1) + } > ice_cream_flavor 12-9
輕鬆學習 R 語言 圖 12-10 ice_cream_flavor 的輸出 在這個眼花撩亂的 ice_cream_flavor 向量中 該如何很快地得知這 100 個 人最愛口味的分佈呢 可以使用 table() 函數 它的作用就像是樞紐分析 可以幫我們把凌亂的資 料統整起來 程式碼 12-2 > ice_cream_flavor <- rep(na, times = 100) > for (i in 1:100){ + ice_cream_flavor[i] <- sample(c("vanilla", "chocolate", "matcha", "other"), size = 1) + } > table(ice_cream_flavor) ice_cream_flavor chocolate matcha other vanilla 32 20 25 23 我們可以一目瞭然 有 32 個人最喜歡的口味是巧克力 20 個人最喜歡抹 茶 25 個人喜歡其他的口味 而有 23 個人最喜歡香草 這時就可以使用 barplot() 函數將這個清晰的結果繪畫成清楚的長條圖 12-10
12 程式碼 12-3 > ice_cream_flavor <- rep(na, times = 100) > for (i in 1:100){ + ice_cream_flavor[i] <- sample(c("vanilla", "chocolate", "matcha", "other"), size = 1) + } > barplot(table(ice_cream_flavor)) 12-11 barplot(table(ice_cream_flavor)) curve() from to sin() > curve(sin, from = -pi, to = pi) 12-11
R 12-12 curve(sin, from = -pi, to = pi) 2 my_sqr() y f ( x) x -3 3 > my_sqr <- function(x){ + return(x^2) + } > curve(my_sqr, from = -3, to = 3) 12-13 curve(my_sqr, from = -3, to = 3) 12-12
12 自訂標題 X 軸標籤與 Y 軸標籤 main xlab ylab > plot(cars, main = "Car speed vs. braking distance", xlab = "Car speed (mph)", ylab = "Braking distance(ft)") 12-14 加入格線 grid() X Y > plot(cars, main = "Car speed vs. braking distance", xlab = "Car speed (mph)", ylab = "Braking distance(ft)") > grid() 12-13
R 12-15 調整圖形為水平方向 horiz = TRUE > ice_cream_flavor <- rep(na, times = 100) > for (i in 1:100){ + ice_cream_flavor[i] <- sample(c("vanilla", "chocolate", "matcha", "other"), size = 1) + } > barplot(table(ice_cream_flavor), horiz = TRUE) 12-16 12-14
12 調整刻度顯示方向 las = 1 > ice_cream_flavor <- rep(na, times = 100) > for (i in 1:100){ + ice_cream_flavor[i] <- sample(c("vanilla", "chocolate", "matcha", "other"), size = 1) + } > barplot(table(ice_cream_flavor), horiz = TRUE, las = 1) 12-17 調整刻度文字大小 chocolate cex.name Y > barplot(table(ice_cream_flavor), horiz = TRUE, las = 1, cex.name = 0.8) 12-18 cex.name 12-15
R X cex.axis > barplot(table(ice_cream_flavor), horiz = TRUE, las = 1, cex.name = 0.8, cex.axis = 1.2) 12-19 cex.axis cex character expansion factor 1 在直方圖上加上密度曲線 density() lines() freq = FALSE > norm_dist <- rnorm(1000) > hist(norm_dist, freq = FALSE) > lines(density(norm_dist)) 12-16
12 12-20 調整資料點的形狀與顏色 pch col pch plotting character col color > plot(cars, pch = 2, col = "red") # 12-21 12-17
R > iris_pch <- c(1, 2, 3)[as.numeric(iris$Species)] > plot(iris$sepal.length, iris$sepal.width, col = iris$species, pch = iris_pch) 12-22 2 pch col Graphical Parameters 繪畫多個圖形 par(mfrow = c(m, n)) mxn mfrow matrix of figures entered row-wise 程式碼 12-4 > par(mfrow = c(2, 2)) > boxplot(iris$sepal.length ~ iris$species, main = "Sepal length by species") > boxplot(iris$sepal.width ~ iris$species, main = "Sepal width by species") > boxplot(iris$petal.length ~ iris$species, main = "Petal length by species") > boxplot(iris$petal.width ~ iris$species, main = "Petal width by species") 12-18
12 12-23 RStudio Plots Export 12-24 12-19
R 12-25 2 Base Plotting System Google StackOverflow 12-20
R Base Plotting System 2X2 Base Plotting System R Graphics Cookbook http://shop.oreilly.com/product/0636920023135.do Graphical Parameters http://www.statmethods.net/advgraphs/parameters.html 12-21