Class 5: Basic R data types and structures
Rui Fu
2022-07-06
Source:vignettes/class-5.Rmd
class-5.Rmd
Classes to come:
basic programming concepts (functions, conditional statements, for loops, etc)
RNAseq
gene lists, GO term enrichment
other tips and tricks
atom (single value) data types
character
integer
numeric
logical
look at classes
class(0L)
#> [1] "integer"
class(0.0)
#> [1] "numeric"
class("0")
#> [1] "character"
is.numeric(0L) # test for certain type/class
#> [1] TRUE
is.integer(0L)
#> [1] TRUE
coerce to other classes
class(as.character(0))
#> [1] "character"
class(as.numeric("0"))
#> [1] "numeric"
integer vs numeric
1:5
#> [1] 1 2 3 4 5
class(1:5)
#> [1] "integer"
object.size(1:1000)
#> 4048 bytes
object.size(as.numeric(1:1000)) # integer saves space
#> 8048 bytes
be careful with integer coercion
as.integer(1.8) # note that as.integer isn't rounding
#> [1] 1
round(1.8)
#> [1] 2
as.integer(-1.8) # be very careful
#> [1] -1
round(-1.8)
#> [1] -2
numbers to logical
as.logical(0) # 0 == FALSE
#> [1] FALSE
as.logical(0.1) # anything not 0 is TRUE
#> [1] TRUE
as.logical("0.1") # can't coerce
#> [1] NA
data structures
1. atomic vector, a combination of values
indexing
v1 <- c("geneA", "geneB", "geneC")
length(v1)
#> [1] 3
v1[2] # access element by index
#> [1] "geneB"
v1[c(3, 1)] # use index to change order
#> [1] "geneC" "geneA"
v1[c(TRUE, TRUE, FALSE)] # use logical vector as index
#> [1] "geneA" "geneB"
v2 <- c(5, 10, 0) # pretend that v2 contains expression values for v1, they can be filtered like this:
v2 >= 2 # result is logical vector
#> [1] TRUE TRUE FALSE
v1[v2 >= 2]
#> [1] "geneA" "geneB"
combining and coercion
c(v2, 4) # combine values
#> [1] 5 10 0 4
c(4, v2) # note order
#> [1] 4 5 10 0
c(v1, v2) # vectors only contain same type of data
#> [1] "geneA" "geneB" "geneC" "5" "10" "0"
class(v2)
#> [1] "numeric"
as.character(v2) # coercion over entire vector
#> [1] "5" "10" "0"
as.character(v2) %>% class()
#> [1] "character"
useful functions
unique(c(1, 2, 3, 2))
#> [1] 1 2 3
sort(c(2, 4, 3))
#> [1] 2 3 4
sort(c("geneB", "geneA", "geneC"))
#> [1] "geneA" "geneB" "geneC"
sort(c("geneB", "geneA", "geneC"), decreasing = TRUE)
#> [1] "geneC" "geneB" "geneA"
intersect(c(1, 2, 3), c(2, 3, 4))
#> [1] 2 3
setdiff(c(1, 2, 3), c(2, 3, 4))
#> [1] 1
setdiff(c(2, 3, 4), c(1, 2, 3)) # note difference order makes
#> [1] 4
2. data.frame, combination of multiple vectors
df vs tbl
mtcars
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
class(mtcars)
#> [1] "data.frame"
mtcars_tbl <- as_tibble(mtcars, rownames = "name") # tibble usually drops rownames
mtcars_tbl # only prints first 10 by default
#> # A tibble: 32 × 12
#> name mpg cyl disp hp drat wt qsec vs am gear carb
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Mazd… 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 Mazd… 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 Dats… 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 Horn… 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 Horn… 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 Vali… 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 Dust… 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 Merc… 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 Merc… 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 Merc… 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
class(mtcars_tbl) # still a data.frame, but more
#> [1] "tbl_df" "tbl" "data.frame"
class(mtcars_tbl) == "data.frame" # programming without considering potentially different result structure is dangerous
#> [1] FALSE FALSE TRUE
is.data.frame(mtcars_tbl)
#> [1] TRUE
exploring data.frame
mtcars$mpg # a vector
#> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
#> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
#> [29] 15.8 19.7 15.0 21.4
mtcars[["mpg"]] # also a vector
#> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
#> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
#> [29] 15.8 19.7 15.0 21.4
dim(mtcars)
#> [1] 32 11
ncol(mtcars)
#> [1] 11
nrow(mtcars)
#> [1] 32
colnames(mtcars)
#> [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
#> [11] "carb"
rownames(mtcars)
#> [1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
#> [4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
#> [7] "Duster 360" "Merc 240D" "Merc 230"
#> [10] "Merc 280" "Merc 280C" "Merc 450SE"
#> [13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
#> [16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
#> [19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
#> [22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
#> [25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
#> [28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
#> [31] "Maserati Bora" "Volvo 142E"
mtcars_tbl_hp <- mtcars_tbl %>% dplyr::select(name, hp)
colnames(mtcars_tbl_hp)
#> [1] "name" "hp"
colnames(mtcars_tbl_hp) <- c("car", "horsepower") # assign new column names
mtcars_tbl_hp
#> # A tibble: 32 × 2
#> car horsepower
#> <chr> <dbl>
#> 1 Mazda RX4 110
#> 2 Mazda RX4 Wag 110
#> 3 Datsun 710 93
#> 4 Hornet 4 Drive 110
#> 5 Hornet Sportabout 175
#> 6 Valiant 105
#> 7 Duster 360 245
#> 8 Merc 240D 62
#> 9 Merc 230 95
#> 10 Merc 280 123
#> # … with 22 more rows
colnames(mtcars_tbl_hp)[1] <- "carname" # assign new column name by index
mtcars_tbl_hp
#> # A tibble: 32 × 2
#> carname horsepower
#> <chr> <dbl>
#> 1 Mazda RX4 110
#> 2 Mazda RX4 Wag 110
#> 3 Datsun 710 93
#> 4 Hornet 4 Drive 110
#> 5 Hornet Sportabout 175
#> 6 Valiant 105
#> 7 Duster 360 245
#> 8 Merc 240D 62
#> 9 Merc 230 95
#> 10 Merc 280 123
#> # … with 22 more rows
data.frame indexing
mtcars[1, 1] # value of one cell, order is row then column
#> [1] 21
mtcars[1, ] # row to new data.frame
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4
mtcars[, 1] # column to vector
#> [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
#> [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
#> [29] 15.8 19.7 15.0 21.4
mtcars[, -c(1:5)] # negative selection
#> wt qsec vs am gear carb
#> Mazda RX4 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 2.875 17.02 0 1 4 4
#> Datsun 710 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 3.215 19.44 1 0 3 1
#> Hornet Sportabout 3.440 17.02 0 0 3 2
#> Valiant 3.460 20.22 1 0 3 1
#> Duster 360 3.570 15.84 0 0 3 4
#> Merc 240D 3.190 20.00 1 0 4 2
#> Merc 230 3.150 22.90 1 0 4 2
#> Merc 280 3.440 18.30 1 0 4 4
#> Merc 280C 3.440 18.90 1 0 4 4
#> Merc 450SE 4.070 17.40 0 0 3 3
#> Merc 450SL 3.730 17.60 0 0 3 3
#> Merc 450SLC 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 5.250 17.98 0 0 3 4
#> Lincoln Continental 5.424 17.82 0 0 3 4
#> Chrysler Imperial 5.345 17.42 0 0 3 4
#> Fiat 128 2.200 19.47 1 1 4 1
#> Honda Civic 1.615 18.52 1 1 4 2
#> Toyota Corolla 1.835 19.90 1 1 4 1
#> Toyota Corona 2.465 20.01 1 0 3 1
#> Dodge Challenger 3.520 16.87 0 0 3 2
#> AMC Javelin 3.435 17.30 0 0 3 2
#> Camaro Z28 3.840 15.41 0 0 3 4
#> Pontiac Firebird 3.845 17.05 0 0 3 2
#> Fiat X1-9 1.935 18.90 1 1 4 1
#> Porsche 914-2 2.140 16.70 0 1 5 2
#> Lotus Europa 1.513 16.90 1 1 5 2
#> Ford Pantera L 3.170 14.50 0 1 5 4
#> Ferrari Dino 2.770 15.50 0 1 5 6
#> Maserati Bora 3.570 14.60 0 1 5 8
#> Volvo 142E 2.780 18.60 1 1 4 2
mtcars[c(1:2), "hp"] # combination of number index and names
#> [1] 110 110
3. matrix, combination of multiple vectors with the same types
matrix vs df
mtcars_mat <- as.matrix(mtcars)
mtcars_mat
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
mtcars_tbl_mat <- as.matrix(mtcars_tbl)
mtcars_tbl_mat # all coerced to character
#> name mpg cyl disp hp drat wt
#> [1,] "Mazda RX4" "21.0" "6" "160.0" "110" "3.90" "2.620"
#> [2,] "Mazda RX4 Wag" "21.0" "6" "160.0" "110" "3.90" "2.875"
#> [3,] "Datsun 710" "22.8" "4" "108.0" " 93" "3.85" "2.320"
#> [4,] "Hornet 4 Drive" "21.4" "6" "258.0" "110" "3.08" "3.215"
#> [5,] "Hornet Sportabout" "18.7" "8" "360.0" "175" "3.15" "3.440"
#> [6,] "Valiant" "18.1" "6" "225.0" "105" "2.76" "3.460"
#> [7,] "Duster 360" "14.3" "8" "360.0" "245" "3.21" "3.570"
#> [8,] "Merc 240D" "24.4" "4" "146.7" " 62" "3.69" "3.190"
#> [9,] "Merc 230" "22.8" "4" "140.8" " 95" "3.92" "3.150"
#> [10,] "Merc 280" "19.2" "6" "167.6" "123" "3.92" "3.440"
#> [11,] "Merc 280C" "17.8" "6" "167.6" "123" "3.92" "3.440"
#> [12,] "Merc 450SE" "16.4" "8" "275.8" "180" "3.07" "4.070"
#> [13,] "Merc 450SL" "17.3" "8" "275.8" "180" "3.07" "3.730"
#> [14,] "Merc 450SLC" "15.2" "8" "275.8" "180" "3.07" "3.780"
#> [15,] "Cadillac Fleetwood" "10.4" "8" "472.0" "205" "2.93" "5.250"
#> [16,] "Lincoln Continental" "10.4" "8" "460.0" "215" "3.00" "5.424"
#> [17,] "Chrysler Imperial" "14.7" "8" "440.0" "230" "3.23" "5.345"
#> [18,] "Fiat 128" "32.4" "4" " 78.7" " 66" "4.08" "2.200"
#> [19,] "Honda Civic" "30.4" "4" " 75.7" " 52" "4.93" "1.615"
#> [20,] "Toyota Corolla" "33.9" "4" " 71.1" " 65" "4.22" "1.835"
#> [21,] "Toyota Corona" "21.5" "4" "120.1" " 97" "3.70" "2.465"
#> [22,] "Dodge Challenger" "15.5" "8" "318.0" "150" "2.76" "3.520"
#> [23,] "AMC Javelin" "15.2" "8" "304.0" "150" "3.15" "3.435"
#> [24,] "Camaro Z28" "13.3" "8" "350.0" "245" "3.73" "3.840"
#> [25,] "Pontiac Firebird" "19.2" "8" "400.0" "175" "3.08" "3.845"
#> [26,] "Fiat X1-9" "27.3" "4" " 79.0" " 66" "4.08" "1.935"
#> [27,] "Porsche 914-2" "26.0" "4" "120.3" " 91" "4.43" "2.140"
#> [28,] "Lotus Europa" "30.4" "4" " 95.1" "113" "3.77" "1.513"
#> [29,] "Ford Pantera L" "15.8" "8" "351.0" "264" "4.22" "3.170"
#> [30,] "Ferrari Dino" "19.7" "6" "145.0" "175" "3.62" "2.770"
#> [31,] "Maserati Bora" "15.0" "8" "301.0" "335" "3.54" "3.570"
#> [32,] "Volvo 142E" "21.4" "4" "121.0" "109" "4.11" "2.780"
#> qsec vs am gear carb
#> [1,] "16.46" "0" "1" "4" "4"
#> [2,] "17.02" "0" "1" "4" "4"
#> [3,] "18.61" "1" "1" "4" "1"
#> [4,] "19.44" "1" "0" "3" "1"
#> [5,] "17.02" "0" "0" "3" "2"
#> [6,] "20.22" "1" "0" "3" "1"
#> [7,] "15.84" "0" "0" "3" "4"
#> [8,] "20.00" "1" "0" "4" "2"
#> [9,] "22.90" "1" "0" "4" "2"
#> [10,] "18.30" "1" "0" "4" "4"
#> [11,] "18.90" "1" "0" "4" "4"
#> [12,] "17.40" "0" "0" "3" "3"
#> [13,] "17.60" "0" "0" "3" "3"
#> [14,] "18.00" "0" "0" "3" "3"
#> [15,] "17.98" "0" "0" "3" "4"
#> [16,] "17.82" "0" "0" "3" "4"
#> [17,] "17.42" "0" "0" "3" "4"
#> [18,] "19.47" "1" "1" "4" "1"
#> [19,] "18.52" "1" "1" "4" "2"
#> [20,] "19.90" "1" "1" "4" "1"
#> [21,] "20.01" "1" "0" "3" "1"
#> [22,] "16.87" "0" "0" "3" "2"
#> [23,] "17.30" "0" "0" "3" "2"
#> [24,] "15.41" "0" "0" "3" "4"
#> [25,] "17.05" "0" "0" "3" "2"
#> [26,] "18.90" "1" "1" "4" "1"
#> [27,] "16.70" "0" "1" "5" "2"
#> [28,] "16.90" "1" "1" "5" "2"
#> [29,] "14.50" "0" "1" "5" "4"
#> [30,] "15.50" "0" "1" "5" "6"
#> [31,] "14.60" "0" "1" "5" "8"
#> [32,] "18.60" "1" "1" "4" "2"
object.size(mtcars)
#> 7208 bytes
object.size(mtcars_mat) # smaller and faster with certain calculations
#> 6440 bytes
very similar function calls to df
# mtcars_mat$mpg # can't do this for matrix
# mtcars_mat[["mpg"]] # can't do this for matrix
dim(mtcars_mat)
#> [1] 32 11
ncol(mtcars_mat)
#> [1] 11
nrow(mtcars_mat)
#> [1] 32
colnames(mtcars_mat)
#> [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
#> [11] "carb"
rownames(mtcars_mat)
#> [1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
#> [4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
#> [7] "Duster 360" "Merc 240D" "Merc 230"
#> [10] "Merc 280" "Merc 280C" "Merc 450SE"
#> [13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
#> [16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
#> [19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
#> [22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
#> [25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
#> [28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
#> [31] "Maserati Bora" "Volvo 142E"
length(mtcars) # number of cols
#> [1] 11
length(mtcars_mat) # number of cells, probably want to avoid using them
#> [1] 352
colnames(mtcars_mat)[1] <- "milespergallon" # assign new column names
mtcars_mat
#> milespergallon cyl disp hp drat wt qsec vs am
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1
#> gear carb
#> Mazda RX4 4 4
#> Mazda RX4 Wag 4 4
#> Datsun 710 4 1
#> Hornet 4 Drive 3 1
#> Hornet Sportabout 3 2
#> Valiant 3 1
#> Duster 360 3 4
#> Merc 240D 4 2
#> Merc 230 4 2
#> Merc 280 4 4
#> Merc 280C 4 4
#> Merc 450SE 3 3
#> Merc 450SL 3 3
#> Merc 450SLC 3 3
#> Cadillac Fleetwood 3 4
#> Lincoln Continental 3 4
#> Chrysler Imperial 3 4
#> Fiat 128 4 1
#> Honda Civic 4 2
#> Toyota Corolla 4 1
#> Toyota Corona 3 1
#> Dodge Challenger 3 2
#> AMC Javelin 3 2
#> Camaro Z28 3 4
#> Pontiac Firebird 3 2
#> Fiat X1-9 4 1
#> Porsche 914-2 5 2
#> Lotus Europa 5 2
#> Ford Pantera L 5 4
#> Ferrari Dino 5 6
#> Maserati Bora 5 8
#> Volvo 142E 4 2
matrix indexing
mtcars_mat[1, ] # row to vector, named vector
#> milespergallon cyl disp hp
#> 21.00 6.00 160.00 110.00
#> drat wt qsec vs
#> 3.90 2.62 16.46 0.00
#> am gear carb
#> 1.00 4.00 4.00
mtcars_mat[, 1] # column to vector
#> Mazda RX4 Mazda RX4 Wag Datsun 710
#> 21.0 21.0 22.8
#> Hornet 4 Drive Hornet Sportabout Valiant
#> 21.4 18.7 18.1
#> Duster 360 Merc 240D Merc 230
#> 14.3 24.4 22.8
#> Merc 280 Merc 280C Merc 450SE
#> 19.2 17.8 16.4
#> Merc 450SL Merc 450SLC Cadillac Fleetwood
#> 17.3 15.2 10.4
#> Lincoln Continental Chrysler Imperial Fiat 128
#> 10.4 14.7 32.4
#> Honda Civic Toyota Corolla Toyota Corona
#> 30.4 33.9 21.5
#> Dodge Challenger AMC Javelin Camaro Z28
#> 15.5 15.2 13.3
#> Pontiac Firebird Fiat X1-9 Porsche 914-2
#> 19.2 27.3 26.0
#> Lotus Europa Ford Pantera L Ferrari Dino
#> 30.4 15.8 19.7
#> Maserati Bora Volvo 142E
#> 15.0 21.4
mtcars_mat[1, 1] # value of one cell
#> [1] 21
mtcars_mat[, -c(1:5)] # negative selection
#> wt qsec vs am gear carb
#> Mazda RX4 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 2.875 17.02 0 1 4 4
#> Datsun 710 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 3.215 19.44 1 0 3 1
#> Hornet Sportabout 3.440 17.02 0 0 3 2
#> Valiant 3.460 20.22 1 0 3 1
#> Duster 360 3.570 15.84 0 0 3 4
#> Merc 240D 3.190 20.00 1 0 4 2
#> Merc 230 3.150 22.90 1 0 4 2
#> Merc 280 3.440 18.30 1 0 4 4
#> Merc 280C 3.440 18.90 1 0 4 4
#> Merc 450SE 4.070 17.40 0 0 3 3
#> Merc 450SL 3.730 17.60 0 0 3 3
#> Merc 450SLC 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 5.250 17.98 0 0 3 4
#> Lincoln Continental 5.424 17.82 0 0 3 4
#> Chrysler Imperial 5.345 17.42 0 0 3 4
#> Fiat 128 2.200 19.47 1 1 4 1
#> Honda Civic 1.615 18.52 1 1 4 2
#> Toyota Corolla 1.835 19.90 1 1 4 1
#> Toyota Corona 2.465 20.01 1 0 3 1
#> Dodge Challenger 3.520 16.87 0 0 3 2
#> AMC Javelin 3.435 17.30 0 0 3 2
#> Camaro Z28 3.840 15.41 0 0 3 4
#> Pontiac Firebird 3.845 17.05 0 0 3 2
#> Fiat X1-9 1.935 18.90 1 1 4 1
#> Porsche 914-2 2.140 16.70 0 1 5 2
#> Lotus Europa 1.513 16.90 1 1 5 2
#> Ford Pantera L 3.170 14.50 0 1 5 4
#> Ferrari Dino 2.770 15.50 0 1 5 6
#> Maserati Bora 3.570 14.60 0 1 5 8
#> Volvo 142E 2.780 18.60 1 1 4 2
mtcars_mat[c(1:2), "hp"] # combination of number index and names
#> Mazda RX4 Mazda RX4 Wag
#> 110 110
t(mtcars_mat)
#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
#> milespergallon 21.00 21.000 22.80 21.400
#> cyl 6.00 6.000 4.00 6.000
#> disp 160.00 160.000 108.00 258.000
#> hp 110.00 110.000 93.00 110.000
#> drat 3.90 3.900 3.85 3.080
#> wt 2.62 2.875 2.32 3.215
#> qsec 16.46 17.020 18.61 19.440
#> vs 0.00 0.000 1.00 1.000
#> am 1.00 1.000 1.00 0.000
#> gear 4.00 4.000 4.00 3.000
#> carb 4.00 4.000 1.00 1.000
#> Hornet Sportabout Valiant Duster 360 Merc 240D Merc 230
#> milespergallon 18.70 18.10 14.30 24.40 22.80
#> cyl 8.00 6.00 8.00 4.00 4.00
#> disp 360.00 225.00 360.00 146.70 140.80
#> hp 175.00 105.00 245.00 62.00 95.00
#> drat 3.15 2.76 3.21 3.69 3.92
#> wt 3.44 3.46 3.57 3.19 3.15
#> qsec 17.02 20.22 15.84 20.00 22.90
#> vs 0.00 1.00 0.00 1.00 1.00
#> am 0.00 0.00 0.00 0.00 0.00
#> gear 3.00 3.00 3.00 4.00 4.00
#> carb 2.00 1.00 4.00 2.00 2.00
#> Merc 280 Merc 280C Merc 450SE Merc 450SL Merc 450SLC
#> milespergallon 19.20 17.80 16.40 17.30 15.20
#> cyl 6.00 6.00 8.00 8.00 8.00
#> disp 167.60 167.60 275.80 275.80 275.80
#> hp 123.00 123.00 180.00 180.00 180.00
#> drat 3.92 3.92 3.07 3.07 3.07
#> wt 3.44 3.44 4.07 3.73 3.78
#> qsec 18.30 18.90 17.40 17.60 18.00
#> vs 1.00 1.00 0.00 0.00 0.00
#> am 0.00 0.00 0.00 0.00 0.00
#> gear 4.00 4.00 3.00 3.00 3.00
#> carb 4.00 4.00 3.00 3.00 3.00
#> Cadillac Fleetwood Lincoln Continental Chrysler Imperial
#> milespergallon 10.40 10.400 14.700
#> cyl 8.00 8.000 8.000
#> disp 472.00 460.000 440.000
#> hp 205.00 215.000 230.000
#> drat 2.93 3.000 3.230
#> wt 5.25 5.424 5.345
#> qsec 17.98 17.820 17.420
#> vs 0.00 0.000 0.000
#> am 0.00 0.000 0.000
#> gear 3.00 3.000 3.000
#> carb 4.00 4.000 4.000
#> Fiat 128 Honda Civic Toyota Corolla Toyota Corona
#> milespergallon 32.40 30.400 33.900 21.500
#> cyl 4.00 4.000 4.000 4.000
#> disp 78.70 75.700 71.100 120.100
#> hp 66.00 52.000 65.000 97.000
#> drat 4.08 4.930 4.220 3.700
#> wt 2.20 1.615 1.835 2.465
#> qsec 19.47 18.520 19.900 20.010
#> vs 1.00 1.000 1.000 1.000
#> am 1.00 1.000 1.000 0.000
#> gear 4.00 4.000 4.000 3.000
#> carb 1.00 2.000 1.000 1.000
#> Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird
#> milespergallon 15.50 15.200 13.30 19.200
#> cyl 8.00 8.000 8.00 8.000
#> disp 318.00 304.000 350.00 400.000
#> hp 150.00 150.000 245.00 175.000
#> drat 2.76 3.150 3.73 3.080
#> wt 3.52 3.435 3.84 3.845
#> qsec 16.87 17.300 15.41 17.050
#> vs 0.00 0.000 0.00 0.000
#> am 0.00 0.000 0.00 0.000
#> gear 3.00 3.000 3.00 3.000
#> carb 2.00 2.000 4.00 2.000
#> Fiat X1-9 Porsche 914-2 Lotus Europa Ford Pantera L
#> milespergallon 27.300 26.00 30.400 15.80
#> cyl 4.000 4.00 4.000 8.00
#> disp 79.000 120.30 95.100 351.00
#> hp 66.000 91.00 113.000 264.00
#> drat 4.080 4.43 3.770 4.22
#> wt 1.935 2.14 1.513 3.17
#> qsec 18.900 16.70 16.900 14.50
#> vs 1.000 0.00 1.000 0.00
#> am 1.000 1.00 1.000 1.00
#> gear 4.000 5.00 5.000 5.00
#> carb 1.000 2.00 2.000 4.00
#> Ferrari Dino Maserati Bora Volvo 142E
#> milespergallon 19.70 15.00 21.40
#> cyl 6.00 8.00 4.00
#> disp 145.00 301.00 121.00
#> hp 175.00 335.00 109.00
#> drat 3.62 3.54 4.11
#> wt 2.77 3.57 2.78
#> qsec 15.50 14.60 18.60
#> vs 0.00 0.00 1.00
#> am 1.00 1.00 1.00
#> gear 5.00 5.00 4.00
#> carb 6.00 8.00 2.00
mathematical operations
log2(mtcars_mat)
#> milespergallon cyl disp hp drat
#> Mazda RX4 4.392317 2.584963 7.321928 6.781360 1.963474
#> Mazda RX4 Wag 4.392317 2.584963 7.321928 6.781360 1.963474
#> Datsun 710 4.510962 2.000000 6.754888 6.539159 1.944858
#> Hornet 4 Drive 4.419539 2.584963 8.011227 6.781360 1.622930
#> Hornet Sportabout 4.224966 3.000000 8.491853 7.451211 1.655352
#> Valiant 4.177918 2.584963 7.813781 6.714246 1.464668
#> Duster 360 3.837943 3.000000 8.491853 7.936638 1.682573
#> Merc 240D 4.608809 2.000000 7.196725 5.954196 1.883621
#> Merc 230 4.510962 2.000000 7.137504 6.569856 1.970854
#> Merc 280 4.263034 2.584963 7.388878 6.942515 1.970854
#> Merc 280C 4.153805 2.584963 7.388878 6.942515 1.970854
#> Merc 450SE 4.035624 3.000000 8.107479 7.491853 1.618239
#> Merc 450SL 4.112700 3.000000 8.107479 7.491853 1.618239
#> Merc 450SLC 3.925999 3.000000 8.107479 7.491853 1.618239
#> Cadillac Fleetwood 3.378512 3.000000 8.882643 7.679480 1.550901
#> Lincoln Continental 3.378512 3.000000 8.845490 7.748193 1.584963
#> Chrysler Imperial 3.877744 3.000000 8.781360 7.845490 1.691534
#> Fiat 128 5.017922 2.000000 6.298292 6.044394 2.028569
#> Honda Civic 4.925999 2.000000 6.242221 5.700440 2.301588
#> Toyota Corolla 5.083213 2.000000 6.151778 6.022368 2.077243
#> Toyota Corona 4.426265 2.000000 6.908092 6.599913 1.887525
#> Dodge Challenger 3.954196 3.000000 8.312883 7.228819 1.464668
#> AMC Javelin 3.925999 3.000000 8.247928 7.228819 1.655352
#> Camaro Z28 3.733354 3.000000 8.451211 7.936638 1.899176
#> Pontiac Firebird 4.263034 3.000000 8.643856 7.451211 1.622930
#> Fiat X1-9 4.770829 2.000000 6.303781 6.044394 2.028569
#> Porsche 914-2 4.700440 2.000000 6.910493 6.507795 2.147307
#> Lotus Europa 4.925999 2.000000 6.571373 6.820179 1.914565
#> Ford Pantera L 3.981853 3.000000 8.455327 8.044394 2.077243
#> Ferrari Dino 4.300124 2.584963 7.179909 7.451211 1.855990
#> Maserati Bora 3.906891 3.000000 8.233620 8.388017 1.823749
#> Volvo 142E 4.419539 2.000000 6.918863 6.768184 2.039138
#> wt qsec vs am gear carb
#> Mazda RX4 1.3895668 4.040892 -Inf 0 2.000000 2.000000
#> Mazda RX4 Wag 1.5235620 4.089159 -Inf 0 2.000000 2.000000
#> Datsun 710 1.2141248 4.218006 0 0 2.000000 0.000000
#> Hornet 4 Drive 1.6848187 4.280956 0 -Inf 1.584963 0.000000
#> Hornet Sportabout 1.7824086 4.089159 -Inf -Inf 1.584963 1.000000
#> Valiant 1.7907720 4.337711 0 -Inf 1.584963 0.000000
#> Duster 360 1.8359241 3.985500 -Inf -Inf 1.584963 2.000000
#> Merc 240D 1.6735564 4.321928 0 -Inf 2.000000 1.000000
#> Merc 230 1.6553518 4.517276 0 -Inf 2.000000 1.000000
#> Merc 280 1.7824086 4.193772 0 -Inf 2.000000 2.000000
#> Merc 280C 1.7824086 4.240314 0 -Inf 2.000000 2.000000
#> Merc 450SE 2.0250288 4.121015 -Inf -Inf 1.584963 1.584963
#> Merc 450SL 1.8991756 4.137504 -Inf -Inf 1.584963 1.584963
#> Merc 450SLC 1.9183862 4.169925 -Inf -Inf 1.584963 1.584963
#> Cadillac Fleetwood 2.3923174 4.168321 -Inf -Inf 1.584963 2.000000
#> Lincoln Continental 2.4393572 4.155425 -Inf -Inf 1.584963 2.000000
#> Chrysler Imperial 2.4181899 4.122673 -Inf -Inf 1.584963 2.000000
#> Fiat 128 1.1375035 4.283181 0 0 2.000000 0.000000
#> Honda Civic 0.6915342 4.211012 0 0 2.000000 1.000000
#> Toyota Corolla 0.8757801 4.314697 0 0 2.000000 0.000000
#> Toyota Corona 1.3015876 4.322649 0 -Inf 1.584963 0.000000
#> Dodge Challenger 1.8155754 4.076388 -Inf -Inf 1.584963 1.000000
#> AMC Javelin 1.7803101 4.112700 -Inf -Inf 1.584963 1.000000
#> Camaro Z28 1.9411063 3.945795 -Inf -Inf 1.584963 2.000000
#> Pontiac Firebird 1.9429836 4.091700 -Inf -Inf 1.584963 1.000000
#> Fiat X1-9 0.9523336 4.240314 0 0 2.000000 0.000000
#> Porsche 914-2 1.0976108 4.061776 -Inf 0 2.321928 1.000000
#> Lotus Europa 0.5974120 4.078951 0 0 2.321928 1.000000
#> Ford Pantera L 1.6644828 3.857981 -Inf 0 2.321928 2.000000
#> Ferrari Dino 1.4698860 3.954196 -Inf 0 2.321928 2.584963
#> Maserati Bora 1.8359241 3.867896 -Inf 0 2.321928 3.000000
#> Volvo 142E 1.4750849 4.217231 0 0 2.000000 1.000000
rowSums(mtcars_mat)
#> Mazda RX4 Mazda RX4 Wag Datsun 710
#> 328.980 329.795 259.580
#> Hornet 4 Drive Hornet Sportabout Valiant
#> 426.135 590.310 385.540
#> Duster 360 Merc 240D Merc 230
#> 656.920 270.980 299.570
#> Merc 280 Merc 280C Merc 450SE
#> 350.460 349.660 510.740
#> Merc 450SL Merc 450SLC Cadillac Fleetwood
#> 511.500 509.850 728.560
#> Lincoln Continental Chrysler Imperial Fiat 128
#> 726.644 725.695 213.850
#> Honda Civic Toyota Corolla Toyota Corona
#> 195.165 206.955 273.775
#> Dodge Challenger AMC Javelin Camaro Z28
#> 519.650 506.085 646.280
#> Pontiac Firebird Fiat X1-9 Porsche 914-2
#> 631.175 208.215 272.570
#> Lotus Europa Ford Pantera L Ferrari Dino
#> 273.683 670.690 379.590
#> Maserati Bora Volvo 142E
#> 694.710 288.890
rowMeans(mtcars_mat)
#> Mazda RX4 Mazda RX4 Wag Datsun 710
#> 29.90727 29.98136 23.59818
#> Hornet 4 Drive Hornet Sportabout Valiant
#> 38.73955 53.66455 35.04909
#> Duster 360 Merc 240D Merc 230
#> 59.72000 24.63455 27.23364
#> Merc 280 Merc 280C Merc 450SE
#> 31.86000 31.78727 46.43091
#> Merc 450SL Merc 450SLC Cadillac Fleetwood
#> 46.50000 46.35000 66.23273
#> Lincoln Continental Chrysler Imperial Fiat 128
#> 66.05855 65.97227 19.44091
#> Honda Civic Toyota Corolla Toyota Corona
#> 17.74227 18.81409 24.88864
#> Dodge Challenger AMC Javelin Camaro Z28
#> 47.24091 46.00773 58.75273
#> Pontiac Firebird Fiat X1-9 Porsche 914-2
#> 57.37955 18.92864 24.77909
#> Lotus Europa Ford Pantera L Ferrari Dino
#> 24.88027 60.97182 34.50818
#> Maserati Bora Volvo 142E
#> 63.15545 26.26273
colSums(mtcars_mat)
#> milespergallon cyl disp hp
#> 642.900 198.000 7383.100 4694.000
#> drat wt qsec vs
#> 115.090 102.952 571.160 14.000
#> am gear carb
#> 13.000 118.000 90.000
colMeans(mtcars_mat)
#> milespergallon cyl disp hp
#> 20.090625 6.187500 230.721875 146.687500
#> drat wt qsec vs
#> 3.596563 3.217250 17.848750 0.437500
#> am gear carb
#> 0.406250 3.687500 2.812500
mtcars_mat - 5 # -5 on every numeric value
#> milespergallon cyl disp hp drat wt qsec vs am
#> Mazda RX4 16.0 1 155.0 105 -1.10 -2.380 11.46 -5 -4
#> Mazda RX4 Wag 16.0 1 155.0 105 -1.10 -2.125 12.02 -5 -4
#> Datsun 710 17.8 -1 103.0 88 -1.15 -2.680 13.61 -4 -4
#> Hornet 4 Drive 16.4 1 253.0 105 -1.92 -1.785 14.44 -4 -5
#> Hornet Sportabout 13.7 3 355.0 170 -1.85 -1.560 12.02 -5 -5
#> Valiant 13.1 1 220.0 100 -2.24 -1.540 15.22 -4 -5
#> Duster 360 9.3 3 355.0 240 -1.79 -1.430 10.84 -5 -5
#> Merc 240D 19.4 -1 141.7 57 -1.31 -1.810 15.00 -4 -5
#> Merc 230 17.8 -1 135.8 90 -1.08 -1.850 17.90 -4 -5
#> Merc 280 14.2 1 162.6 118 -1.08 -1.560 13.30 -4 -5
#> Merc 280C 12.8 1 162.6 118 -1.08 -1.560 13.90 -4 -5
#> Merc 450SE 11.4 3 270.8 175 -1.93 -0.930 12.40 -5 -5
#> Merc 450SL 12.3 3 270.8 175 -1.93 -1.270 12.60 -5 -5
#> Merc 450SLC 10.2 3 270.8 175 -1.93 -1.220 13.00 -5 -5
#> Cadillac Fleetwood 5.4 3 467.0 200 -2.07 0.250 12.98 -5 -5
#> Lincoln Continental 5.4 3 455.0 210 -2.00 0.424 12.82 -5 -5
#> Chrysler Imperial 9.7 3 435.0 225 -1.77 0.345 12.42 -5 -5
#> Fiat 128 27.4 -1 73.7 61 -0.92 -2.800 14.47 -4 -4
#> Honda Civic 25.4 -1 70.7 47 -0.07 -3.385 13.52 -4 -4
#> Toyota Corolla 28.9 -1 66.1 60 -0.78 -3.165 14.90 -4 -4
#> Toyota Corona 16.5 -1 115.1 92 -1.30 -2.535 15.01 -4 -5
#> Dodge Challenger 10.5 3 313.0 145 -2.24 -1.480 11.87 -5 -5
#> AMC Javelin 10.2 3 299.0 145 -1.85 -1.565 12.30 -5 -5
#> Camaro Z28 8.3 3 345.0 240 -1.27 -1.160 10.41 -5 -5
#> Pontiac Firebird 14.2 3 395.0 170 -1.92 -1.155 12.05 -5 -5
#> Fiat X1-9 22.3 -1 74.0 61 -0.92 -3.065 13.90 -4 -4
#> Porsche 914-2 21.0 -1 115.3 86 -0.57 -2.860 11.70 -5 -4
#> Lotus Europa 25.4 -1 90.1 108 -1.23 -3.487 11.90 -4 -4
#> Ford Pantera L 10.8 3 346.0 259 -0.78 -1.830 9.50 -5 -4
#> Ferrari Dino 14.7 1 140.0 170 -1.38 -2.230 10.50 -5 -4
#> Maserati Bora 10.0 3 296.0 330 -1.46 -1.430 9.60 -5 -4
#> Volvo 142E 16.4 -1 116.0 104 -0.89 -2.220 13.60 -4 -4
#> gear carb
#> Mazda RX4 -1 -1
#> Mazda RX4 Wag -1 -1
#> Datsun 710 -1 -4
#> Hornet 4 Drive -2 -4
#> Hornet Sportabout -2 -3
#> Valiant -2 -4
#> Duster 360 -2 -1
#> Merc 240D -1 -3
#> Merc 230 -1 -3
#> Merc 280 -1 -1
#> Merc 280C -1 -1
#> Merc 450SE -2 -2
#> Merc 450SL -2 -2
#> Merc 450SLC -2 -2
#> Cadillac Fleetwood -2 -1
#> Lincoln Continental -2 -1
#> Chrysler Imperial -2 -1
#> Fiat 128 -1 -4
#> Honda Civic -1 -3
#> Toyota Corolla -1 -4
#> Toyota Corona -2 -4
#> Dodge Challenger -2 -3
#> AMC Javelin -2 -3
#> Camaro Z28 -2 -1
#> Pontiac Firebird -2 -3
#> Fiat X1-9 -1 -4
#> Porsche 914-2 0 -3
#> Lotus Europa 0 -3
#> Ford Pantera L 0 -1
#> Ferrari Dino 0 1
#> Maserati Bora 0 3
#> Volvo 142E -1 -3
mtcars_mat * c(1, 0, -1) # vector recycling, note sequence
#> milespergallon cyl disp hp drat wt qsec vs
#> Mazda RX4 21.0 -6 0.0 110 -3.90 0.000 16.46 0
#> Mazda RX4 Wag 0.0 6 -160.0 0 3.90 -2.875 0.00 0
#> Datsun 710 -22.8 0 108.0 -93 0.00 2.320 -18.61 0
#> Hornet 4 Drive 21.4 -6 0.0 110 -3.08 0.000 19.44 -1
#> Hornet Sportabout 0.0 8 -360.0 0 3.15 -3.440 0.00 0
#> Valiant -18.1 0 225.0 -105 0.00 3.460 -20.22 0
#> Duster 360 14.3 -8 0.0 245 -3.21 0.000 15.84 0
#> Merc 240D 0.0 4 -146.7 0 3.69 -3.190 0.00 1
#> Merc 230 -22.8 0 140.8 -95 0.00 3.150 -22.90 0
#> Merc 280 19.2 -6 0.0 123 -3.92 0.000 18.30 -1
#> Merc 280C 0.0 6 -167.6 0 3.92 -3.440 0.00 1
#> Merc 450SE -16.4 0 275.8 -180 0.00 4.070 -17.40 0
#> Merc 450SL 17.3 -8 0.0 180 -3.07 0.000 17.60 0
#> Merc 450SLC 0.0 8 -275.8 0 3.07 -3.780 0.00 0
#> Cadillac Fleetwood -10.4 0 472.0 -205 0.00 5.250 -17.98 0
#> Lincoln Continental 10.4 -8 0.0 215 -3.00 0.000 17.82 0
#> Chrysler Imperial 0.0 8 -440.0 0 3.23 -5.345 0.00 0
#> Fiat 128 -32.4 0 78.7 -66 0.00 2.200 -19.47 0
#> Honda Civic 30.4 -4 0.0 52 -4.93 0.000 18.52 -1
#> Toyota Corolla 0.0 4 -71.1 0 4.22 -1.835 0.00 1
#> Toyota Corona -21.5 0 120.1 -97 0.00 2.465 -20.01 0
#> Dodge Challenger 15.5 -8 0.0 150 -2.76 0.000 16.87 0
#> AMC Javelin 0.0 8 -304.0 0 3.15 -3.435 0.00 0
#> Camaro Z28 -13.3 0 350.0 -245 0.00 3.840 -15.41 0
#> Pontiac Firebird 19.2 -8 0.0 175 -3.08 0.000 17.05 0
#> Fiat X1-9 0.0 4 -79.0 0 4.08 -1.935 0.00 1
#> Porsche 914-2 -26.0 0 120.3 -91 0.00 2.140 -16.70 0
#> Lotus Europa 30.4 -4 0.0 113 -3.77 0.000 16.90 -1
#> Ford Pantera L 0.0 8 -351.0 0 4.22 -3.170 0.00 0
#> Ferrari Dino -19.7 0 145.0 -175 0.00 2.770 -15.50 0
#> Maserati Bora 15.0 -8 0.0 335 -3.54 0.000 14.60 0
#> Volvo 142E 0.0 4 -121.0 0 4.11 -2.780 0.00 1
#> am gear carb
#> Mazda RX4 0 4 -4
#> Mazda RX4 Wag -1 0 4
#> Datsun 710 1 -4 0
#> Hornet 4 Drive 0 3 -1
#> Hornet Sportabout 0 0 2
#> Valiant 0 -3 0
#> Duster 360 0 3 -4
#> Merc 240D 0 0 2
#> Merc 230 0 -4 0
#> Merc 280 0 4 -4
#> Merc 280C 0 0 4
#> Merc 450SE 0 -3 0
#> Merc 450SL 0 3 -3
#> Merc 450SLC 0 0 3
#> Cadillac Fleetwood 0 -3 0
#> Lincoln Continental 0 3 -4
#> Chrysler Imperial 0 0 4
#> Fiat 128 1 -4 0
#> Honda Civic 0 4 -2
#> Toyota Corolla -1 0 1
#> Toyota Corona 0 -3 0
#> Dodge Challenger 0 3 -2
#> AMC Javelin 0 0 2
#> Camaro Z28 0 -3 0
#> Pontiac Firebird 0 3 -2
#> Fiat X1-9 -1 0 1
#> Porsche 914-2 1 -5 0
#> Lotus Europa 0 5 -2
#> Ford Pantera L -1 0 4
#> Ferrari Dino 1 -5 0
#> Maserati Bora 0 5 -8
#> Volvo 142E -1 0 2
mtcars_mat - mtcars_mat[, 1] # each element in the same row is subtracted by the corresponding vector element, ie normalize by the 1st column
#> milespergallon cyl disp hp drat wt
#> Mazda RX4 0 -15.0 139.0 89.0 -17.10 -18.380
#> Mazda RX4 Wag 0 -15.0 139.0 89.0 -17.10 -18.125
#> Datsun 710 0 -18.8 85.2 70.2 -18.95 -20.480
#> Hornet 4 Drive 0 -15.4 236.6 88.6 -18.32 -18.185
#> Hornet Sportabout 0 -10.7 341.3 156.3 -15.55 -15.260
#> Valiant 0 -12.1 206.9 86.9 -15.34 -14.640
#> Duster 360 0 -6.3 345.7 230.7 -11.09 -10.730
#> Merc 240D 0 -20.4 122.3 37.6 -20.71 -21.210
#> Merc 230 0 -18.8 118.0 72.2 -18.88 -19.650
#> Merc 280 0 -13.2 148.4 103.8 -15.28 -15.760
#> Merc 280C 0 -11.8 149.8 105.2 -13.88 -14.360
#> Merc 450SE 0 -8.4 259.4 163.6 -13.33 -12.330
#> Merc 450SL 0 -9.3 258.5 162.7 -14.23 -13.570
#> Merc 450SLC 0 -7.2 260.6 164.8 -12.13 -11.420
#> Cadillac Fleetwood 0 -2.4 461.6 194.6 -7.47 -5.150
#> Lincoln Continental 0 -2.4 449.6 204.6 -7.40 -4.976
#> Chrysler Imperial 0 -6.7 425.3 215.3 -11.47 -9.355
#> Fiat 128 0 -28.4 46.3 33.6 -28.32 -30.200
#> Honda Civic 0 -26.4 45.3 21.6 -25.47 -28.785
#> Toyota Corolla 0 -29.9 37.2 31.1 -29.68 -32.065
#> Toyota Corona 0 -17.5 98.6 75.5 -17.80 -19.035
#> Dodge Challenger 0 -7.5 302.5 134.5 -12.74 -11.980
#> AMC Javelin 0 -7.2 288.8 134.8 -12.05 -11.765
#> Camaro Z28 0 -5.3 336.7 231.7 -9.57 -9.460
#> Pontiac Firebird 0 -11.2 380.8 155.8 -16.12 -15.355
#> Fiat X1-9 0 -23.3 51.7 38.7 -23.22 -25.365
#> Porsche 914-2 0 -22.0 94.3 65.0 -21.57 -23.860
#> Lotus Europa 0 -26.4 64.7 82.6 -26.63 -28.887
#> Ford Pantera L 0 -7.8 335.2 248.2 -11.58 -12.630
#> Ferrari Dino 0 -13.7 125.3 155.3 -16.08 -16.930
#> Maserati Bora 0 -7.0 286.0 320.0 -11.46 -11.430
#> Volvo 142E 0 -17.4 99.6 87.6 -17.29 -18.620
#> qsec vs am gear carb
#> Mazda RX4 -4.54 -21.0 -20.0 -17.0 -17.0
#> Mazda RX4 Wag -3.98 -21.0 -20.0 -17.0 -17.0
#> Datsun 710 -4.19 -21.8 -21.8 -18.8 -21.8
#> Hornet 4 Drive -1.96 -20.4 -21.4 -18.4 -20.4
#> Hornet Sportabout -1.68 -18.7 -18.7 -15.7 -16.7
#> Valiant 2.12 -17.1 -18.1 -15.1 -17.1
#> Duster 360 1.54 -14.3 -14.3 -11.3 -10.3
#> Merc 240D -4.40 -23.4 -24.4 -20.4 -22.4
#> Merc 230 0.10 -21.8 -22.8 -18.8 -20.8
#> Merc 280 -0.90 -18.2 -19.2 -15.2 -15.2
#> Merc 280C 1.10 -16.8 -17.8 -13.8 -13.8
#> Merc 450SE 1.00 -16.4 -16.4 -13.4 -13.4
#> Merc 450SL 0.30 -17.3 -17.3 -14.3 -14.3
#> Merc 450SLC 2.80 -15.2 -15.2 -12.2 -12.2
#> Cadillac Fleetwood 7.58 -10.4 -10.4 -7.4 -6.4
#> Lincoln Continental 7.42 -10.4 -10.4 -7.4 -6.4
#> Chrysler Imperial 2.72 -14.7 -14.7 -11.7 -10.7
#> Fiat 128 -12.93 -31.4 -31.4 -28.4 -31.4
#> Honda Civic -11.88 -29.4 -29.4 -26.4 -28.4
#> Toyota Corolla -14.00 -32.9 -32.9 -29.9 -32.9
#> Toyota Corona -1.49 -20.5 -21.5 -18.5 -20.5
#> Dodge Challenger 1.37 -15.5 -15.5 -12.5 -13.5
#> AMC Javelin 2.10 -15.2 -15.2 -12.2 -13.2
#> Camaro Z28 2.11 -13.3 -13.3 -10.3 -9.3
#> Pontiac Firebird -2.15 -19.2 -19.2 -16.2 -17.2
#> Fiat X1-9 -8.40 -26.3 -26.3 -23.3 -26.3
#> Porsche 914-2 -9.30 -26.0 -25.0 -21.0 -24.0
#> Lotus Europa -13.50 -29.4 -29.4 -25.4 -28.4
#> Ford Pantera L -1.30 -15.8 -14.8 -10.8 -11.8
#> Ferrari Dino -4.20 -19.7 -18.7 -14.7 -13.7
#> Maserati Bora -0.40 -15.0 -14.0 -10.0 -7.0
#> Volvo 142E -2.80 -20.4 -20.4 -17.4 -19.4
matrix practice question -
How would you center the data (subtract the mean for each variable/column)?
#hint: start with
mtcars_mat_t <- t(mtcars_mat)
4. list, collection of objects(vectors, matrices, data.frames, etc)
lists store all types of data
l1 <- list(1,
c("what", "ever"),
mtcars)
l1
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] "what" "ever"
#>
#> [[3]]
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
l2 <- list(n = 1,
c = c("what", "ever"),
df = mtcars,
l = l1) # with names, also can even include lists
l2
#> $n
#> [1] 1
#>
#> $c
#> [1] "what" "ever"
#>
#> $df
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
#>
#> $l
#> $l[[1]]
#> [1] 1
#>
#> $l[[2]]
#> [1] "what" "ever"
#>
#> $l[[3]]
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
#> Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
#> Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
#> Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
#> Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
#> Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
#> Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
#> Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
#> Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
#> Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
#> Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
#> Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
#> Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
#> Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
#> Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
#> Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
#> Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
#> AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#> Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
#> Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
#> Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
pbda::cc.genes # example of list
#> $s.genes
#> [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> [7] "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> [13] "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> [19] "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> [25] "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> [31] "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> [37] "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> [43] "E2F8"
#>
#> $g2m.genes
#> [1] "HMGB2" "CDK1" "NUSAP1" "UBE2C" "BIRC5" "TPX2" "TOP2A"
#> [8] "NDC80" "CKS2" "NUF2" "CKS1B" "MKI67" "TMPO" "CENPF"
#> [15] "TACC3" "FAM64A" "SMC4" "CCNB2" "CKAP2L" "CKAP2" "AURKB"
#> [22] "BUB1" "KIF11" "ANP32E" "TUBB4B" "GTSE1" "KIF20B" "HJURP"
#> [29] "CDCA3" "HN1" "CDC20" "TTK" "CDC25C" "KIF2C" "RANGAP1"
#> [36] "NCAPD2" "DLGAP5" "CDCA2" "CDCA8" "ECT2" "KIF23" "HMMR"
#> [43] "AURKA" "PSRC1" "ANLN" "LBR" "CKAP5" "CENPE" "CTCF"
#> [50] "NEK2" "G2E3" "GAS2L3" "CBX5" "CENPA"
exploring list
cc.genes[[1]]
#> [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> [7] "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> [13] "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> [19] "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> [25] "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> [31] "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> [37] "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> [43] "E2F8"
cc.genes$s.genes
#> [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> [7] "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> [13] "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> [19] "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> [25] "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> [31] "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> [37] "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> [43] "E2F8"
length(cc.genes)
#> [1] 2
length(cc.genes[[1]])
#> [1] 43
names(cc.genes)
#> [1] "s.genes" "g2m.genes"
unlist(cc.genes) # named vector
#> s.genes1 s.genes2 s.genes3 s.genes4 s.genes5 s.genes6
#> "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> s.genes7 s.genes8 s.genes9 s.genes10 s.genes11 s.genes12
#> "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> s.genes13 s.genes14 s.genes15 s.genes16 s.genes17 s.genes18
#> "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> s.genes19 s.genes20 s.genes21 s.genes22 s.genes23 s.genes24
#> "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> s.genes25 s.genes26 s.genes27 s.genes28 s.genes29 s.genes30
#> "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> s.genes31 s.genes32 s.genes33 s.genes34 s.genes35 s.genes36
#> "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> s.genes37 s.genes38 s.genes39 s.genes40 s.genes41 s.genes42
#> "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> s.genes43 g2m.genes1 g2m.genes2 g2m.genes3 g2m.genes4 g2m.genes5
#> "E2F8" "HMGB2" "CDK1" "NUSAP1" "UBE2C" "BIRC5"
#> g2m.genes6 g2m.genes7 g2m.genes8 g2m.genes9 g2m.genes10 g2m.genes11
#> "TPX2" "TOP2A" "NDC80" "CKS2" "NUF2" "CKS1B"
#> g2m.genes12 g2m.genes13 g2m.genes14 g2m.genes15 g2m.genes16 g2m.genes17
#> "MKI67" "TMPO" "CENPF" "TACC3" "FAM64A" "SMC4"
#> g2m.genes18 g2m.genes19 g2m.genes20 g2m.genes21 g2m.genes22 g2m.genes23
#> "CCNB2" "CKAP2L" "CKAP2" "AURKB" "BUB1" "KIF11"
#> g2m.genes24 g2m.genes25 g2m.genes26 g2m.genes27 g2m.genes28 g2m.genes29
#> "ANP32E" "TUBB4B" "GTSE1" "KIF20B" "HJURP" "CDCA3"
#> g2m.genes30 g2m.genes31 g2m.genes32 g2m.genes33 g2m.genes34 g2m.genes35
#> "HN1" "CDC20" "TTK" "CDC25C" "KIF2C" "RANGAP1"
#> g2m.genes36 g2m.genes37 g2m.genes38 g2m.genes39 g2m.genes40 g2m.genes41
#> "NCAPD2" "DLGAP5" "CDCA2" "CDCA8" "ECT2" "KIF23"
#> g2m.genes42 g2m.genes43 g2m.genes44 g2m.genes45 g2m.genes46 g2m.genes47
#> "HMMR" "AURKA" "PSRC1" "ANLN" "LBR" "CKAP5"
#> g2m.genes48 g2m.genes49 g2m.genes50 g2m.genes51 g2m.genes52 g2m.genes53
#> "CENPE" "CTCF" "NEK2" "G2E3" "GAS2L3" "CBX5"
#> g2m.genes54
#> "CENPA"
unlist(cc.genes, use.names = FALSE)
#> [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> [7] "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> [13] "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> [19] "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> [25] "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> [31] "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> [37] "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> [43] "E2F8" "HMGB2" "CDK1" "NUSAP1" "UBE2C" "BIRC5"
#> [49] "TPX2" "TOP2A" "NDC80" "CKS2" "NUF2" "CKS1B"
#> [55] "MKI67" "TMPO" "CENPF" "TACC3" "FAM64A" "SMC4"
#> [61] "CCNB2" "CKAP2L" "CKAP2" "AURKB" "BUB1" "KIF11"
#> [67] "ANP32E" "TUBB4B" "GTSE1" "KIF20B" "HJURP" "CDCA3"
#> [73] "HN1" "CDC20" "TTK" "CDC25C" "KIF2C" "RANGAP1"
#> [79] "NCAPD2" "DLGAP5" "CDCA2" "CDCA8" "ECT2" "KIF23"
#> [85] "HMMR" "AURKA" "PSRC1" "ANLN" "LBR" "CKAP5"
#> [91] "CENPE" "CTCF" "NEK2" "G2E3" "GAS2L3" "CBX5"
#> [97] "CENPA"
c(cc.genes, "geneA") # combine into list
#> $s.genes
#> [1] "MCM5" "PCNA" "TYMS" "FEN1" "MCM2" "MCM4"
#> [7] "RRM1" "UNG" "GINS2" "MCM6" "CDCA7" "DTL"
#> [13] "PRIM1" "UHRF1" "MLF1IP" "HELLS" "RFC2" "RPA2"
#> [19] "NASP" "RAD51AP1" "GMNN" "WDR76" "SLBP" "CCNE2"
#> [25] "UBR7" "POLD3" "MSH2" "ATAD2" "RAD51" "RRM2"
#> [31] "CDC45" "CDC6" "EXO1" "TIPIN" "DSCC1" "BLM"
#> [37] "CASP8AP2" "USP1" "CLSPN" "POLA1" "CHAF1B" "BRIP1"
#> [43] "E2F8"
#>
#> $g2m.genes
#> [1] "HMGB2" "CDK1" "NUSAP1" "UBE2C" "BIRC5" "TPX2" "TOP2A"
#> [8] "NDC80" "CKS2" "NUF2" "CKS1B" "MKI67" "TMPO" "CENPF"
#> [15] "TACC3" "FAM64A" "SMC4" "CCNB2" "CKAP2L" "CKAP2" "AURKB"
#> [22] "BUB1" "KIF11" "ANP32E" "TUBB4B" "GTSE1" "KIF20B" "HJURP"
#> [29] "CDCA3" "HN1" "CDC20" "TTK" "CDC25C" "KIF2C" "RANGAP1"
#> [36] "NCAPD2" "DLGAP5" "CDCA2" "CDCA8" "ECT2" "KIF23" "HMMR"
#> [43] "AURKA" "PSRC1" "ANLN" "LBR" "CKAP5" "CENPE" "CTCF"
#> [50] "NEK2" "G2E3" "GAS2L3" "CBX5" "CENPA"
#>
#> [[3]]
#> [1] "geneA"
cc.genes[[1]] <- c(cc.genes[[1]], "geneA") # combine into first list element
6. factors, grouping variables the data
plotting use case
library(ggplot2)
months_tbl <- data.frame(month = c("Jan", "Feb", "Mar"),
labmeetings = c(0, 3, 9))
ggplot(months_tbl, aes(x = month, y = labmeetings)) +
geom_col() +
cowplot::theme_cowplot() # <- ordered alphabetical, not ideal
for other uses of factors, please visit https://forcats.tidyverse.org/
I/O, reading and writing files
readr package
faster more flexible than base R functions for large files
library(readr)
path <- system.file("extdata", "gene_tibble.csv", package = 'pbda') # data included in package
# call `less` from the terminal, paste in path as well
gene_tbl <- read_csv(path)
write_csv(gene_tbl, "gene_tbl.csv")
write_csv(gene_tbl, "gene_tbl.csv.gz") # will auto zip if indicated
getwd() # saved here if full path is not given
#> [1] "/home/runner/work/practical-data-analysis/practical-data-analysis/vignettes"
path2 <- system.file("extdata", "hg19_genes.bed.gz", package = 'pbda') # will auto unzip
bed_tbl <- read_tsv(path2) # use col_names = FALSE or give vector of names
# use the terminal and zless to look at it briefly
david_tbl <- read_tsv("https://raw.githubusercontent.com/IDPT7810/practical-data-analysis/master/inst/extdata/david.txt") # link directly
write_lines(cc.genes$s.genes, "Sgenes.txt") # write vector into file, each element on a line
?read_delim # worth looking through the options
we recommend using readr functions, with “_“, but beware of the rowname exclusion
write_csv(mtcars, "mtcars.txt")
read_csv("mtcars.txt") # row names are gone!
#> # A tibble: 32 × 11
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
#> # … with 22 more rows
mtcars %>% as_tibble(rownames = "rowname") # before saving, use one of these two options
#> # A tibble: 32 × 12
#> rowname mpg cyl disp hp drat wt qsec vs am gear
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Mazda RX4 21 6 160 110 3.9 2.62 16.5 0 1 4
#> 2 Mazda RX4 … 21 6 160 110 3.9 2.88 17.0 0 1 4
#> 3 Datsun 710 22.8 4 108 93 3.85 2.32 18.6 1 1 4
#> 4 Hornet 4 D… 21.4 6 258 110 3.08 3.22 19.4 1 0 3
#> 5 Hornet Spo… 18.7 8 360 175 3.15 3.44 17.0 0 0 3
#> 6 Valiant 18.1 6 225 105 2.76 3.46 20.2 1 0 3
#> 7 Duster 360 14.3 8 360 245 3.21 3.57 15.8 0 0 3
#> 8 Merc 240D 24.4 4 147. 62 3.69 3.19 20 1 0 4
#> 9 Merc 230 22.8 4 141. 95 3.92 3.15 22.9 1 0 4
#> 10 Merc 280 19.2 6 168. 123 3.92 3.44 18.3 1 0 4
#> # … with 22 more rows, and 1 more variable: carb <dbl>
mtcars %>% tibble::rownames_to_column("rowname")
#> rowname mpg cyl disp hp drat wt qsec vs am gear
#> 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4
#> 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4
#> 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4
#> 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3
#> 5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3
#> 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3
#> 7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3
#> 8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4
#> 9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4
#> 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4
#> 11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4
#> 12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3
#> 13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3
#> 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3
#> 15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3
#> 16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3
#> 17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3
#> 18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4
#> 19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4
#> 20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4
#> 21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3
#> 22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3
#> 23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3
#> 24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3
#> 25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3
#> 26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4
#> 27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5
#> 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5
#> 29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5
#> 30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5
#> 31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5
#> 32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4
#> carb
#> 1 4
#> 2 4
#> 3 1
#> 4 1
#> 5 2
#> 6 1
#> 7 4
#> 8 2
#> 9 2
#> 10 4
#> 11 4
#> 12 3
#> 13 3
#> 14 3
#> 15 4
#> 16 4
#> 17 4
#> 18 1
#> 19 2
#> 20 1
#> 21 1
#> 22 2
#> 23 2
#> 24 4
#> 25 2
#> 26 1
#> 27 2
#> 28 2
#> 29 4
#> 30 6
#> 31 8
#> 32 2
write.csv(mtcars, "mtcars2.txt")
r1 <- read_csv("mtcars2.txt")
r2 <- read.csv("mtcars2.txt") # note the different default column name assignment
r3 <- read.csv("mtcars2.txt", row.names = 1) # gets back row names
readr practice question -
look at this variant call format, read with readr. rename column names to c(“chromosome”, “position”, “variantID”, “ref_allele”, “alt_allele”, “quality”, “filter”, “info”) count number of variants at each reported position, sort by descending order
# download.file("https://raw.githubusercontent.com/IDPT7810/practical-data-analysis/master/inst/extdata/clinvar_2000.vcf",
# "clinvar_2000.vcf")
readxl for xlsx files
library(readxl)
read_excel(readxl_example("datasets.xlsx"))
#> # A tibble: 150 × 5
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
#> 7 4.6 3.4 1.4 0.3 setosa
#> 8 5 3.4 1.5 0.2 setosa
#> 9 4.4 2.9 1.4 0.2 setosa
#> 10 4.9 3.1 1.5 0.1 setosa
#> # … with 140 more rows
?read_excel