Comment

I am a comment

Output

• print(“Hello World”) • cat(“Hello”, “World”)

Information about R Commands

# To find information about a specific command, e.g. ‘which’ • apropos(“which”) • help(which) • ?which • RSiteSearch(“which”)

Command History & Exit

# Show previously entered commands history()

# Exit RStudio q()

Working Directory

# Show my R working directory getwd()

# Change Working Directory

# Set a new working directory

setwd(‘PFAD’)

Saving R Commands/Script

Save R commands or R script locally save (myTable, file=“C:/Users/axxKreis/Desktop/myTable.RData”)

Loading R Commands/Script

# Load persisted results load (file=“C:/Users/axxKreis/Desktop/myTable.RData”)

Loading a CSV File

# Load a table (CSV file) myCsvTable <- read.table(file=“C:/[PFAD]/myCsvTable.csv”, header=TRUE, sep=";") or myCsvTable <- read.csv(file=“C:/[PFAD]/myCsvTable.csv”, header=TRUE, sep=";")

Libraries

Information about available libraries

# Indicates where libraries are installed .libPaths()

# Show installed base packages getOption(“defaultPackages”)

# List all packages currently loaded (.packages())

# All packages that have been installed and are available (.packages(all.available=TRUE)) or library() # For detailed information or installed.packages()

# Location of installed packages or libs Sys.getenv(“R_LIBS_USER”)

Loading a Library

# Load existing library, e.g. rpart library(rpart)

Installing a Library

install.packages(c(“tree”,“maptree”)) install.packages(“Library name”, dependencies=TRUE)

Public List of R Libraries

https://cran.r-project.org/web/packages/available_packages_by_date.html

Updating Installed Packages

update.packages()

Loading Libraries at RStudio Startup

# Edit the following file with a text editor in admin mode: C:\Program Files\R\R-3.2.4\etc\Rprofile.site # Add the desired libraries within the file library(DBI) library(FSelector) library(ggplot2) library(lattice) library(RODBC) library(RMySQL) library(lattice)

Installing and Loading Database Libraries

RODBC corresponds to ODBC DBI corresponds to JDB RMySQL corresponds to MySQL

# Install DB packages install.packages(“RODBC”) install.packages(“DBI”) install.packages(“RMySQL”) # MySQL

# Load installed package library(RODBC) # An error message will appear if there is an issue. Otherwise no output. library(DBI) library(RMySQL)

Library for Data Processing

install.packages(“FSelector”)

Determining the Entropy for ’target value’

entropy (entropyTargetValue)

Information Gain

information.gain(data)

# Determine ‘Information Gain’ for attribute ‘xxx’ with respect to ’target value’ information.gain (head(data[c(“TARGET.VALUE”,“xxx”)],length(data$TARGET.VALUE)))

Library for Visualization

# Visualization packages • lattice # Must be installed and loaded • ggplot2

Data Types

# Most commonly used data types

  • Vectors
    • Logical
    • Numeric
    • Integer
    • Complex (1+2x)
    • Character
    • Raw (Hexadecimal)
  • Lists
  • Matrices
  • Arrays
  • Factors

Variables (Symbols)

Declaration

a <- 1 or a = 1

# Functional notation ‘<-’(a,1)

Show All Variables

ls()

# Show all variables matching a pattern ls(pattern = “var”)

# Show all variables including hidden ones ls (all.names = TRUE)

Delete Variable(s) ‘a’

rm(a)

# Delete all variables rm(list = ls())

Compare Variables

a = 1 b = 2 a==b # FALSE

Operators

Arithmetic Operators

- + * / %% %/% ^

Relational Operators

> < == <= >= !=

Logical Operators

& | ! && ||

Assignment Operators

<− (local) = «− (global) -> -»

Other Operators

: %in% %*%

Examples

# Modulus (%%)

a = c(4, 8, 12) b = c(12, 8, 4) a%%b

Result: 4 0 0

# How many times values from vector ‘a’ fit into vector ‘b’

a = c(4, 8, 12) b = c(12, 8, 4) a%/%b

Result: 0 1 3 # Exponentiate vector ‘a’ by the numbers in vector ‘b’, e.g. 2*2*2=8; 3*3=9; 4*4=16 a = c(2, 3, 4) b = c(3, 2, 2) a^b

Result: 8 9 16

# Check if values in vector ‘a’ are greater than in vector ‘b’ a = c(4, 8, 12) b = c(12, 8, 4) a>b

Result: FALSE FALSE TRUE # Returns true if both values in vectors ‘a’ and ‘b’ are TRUE a = c(2, TRUE, 4) b = c(3, FALSE, 2) a&b

Result: TRUE FALSE TRUE

# Returns TRUE if any value of a vector is TRUE, e.g. 0 and 0 = FALSE or FALSE and FALSE = FALSE a = c(3,0,TRUE,FALSE, 0) b = c(4,0,FALSE,FALSE, 0.1) a|b

Result: TRUE FALSE TRUE FALSE TRUE

# Check if vector contains a specific value a = c(3,0,TRUE,FALSE, 0) 3%in%a

Result: TRUE String

String

# Single quotes are internally converted to double quotes ‘hello’ “Hello”

Get length of a string

nchar(a)

Substring

a=“abcdefg” substr(a,3,5)

Result: cde

String Concatenation

a <- “Hello” b <- ‘How’ c <- “are you?”

paste(a,b,c) “Hello How are you?”

paste(a,b,c, sep = “@”) “Hello@How@are you?”

paste(a,b,c, sep = “”, collapse = “”) “HelloHoware you? "

Generate 5-character String

format(18, width = 5) [1] " 18”

Output Alignment

format(“Hello”, width = 8, justify = “centre”) #left, right, centre, none " Hello "

Convert String to Upper and Lower Case

x = “Hello” toupper(x) tolower(x)

Factor

v = c(“thomas”, “thomas”, “thomas”, “linda”)

levels(v) NULL

typeof(v) [1] “character”

class(v) [1] “character”

# Convert to factor & check

v = factor(v)

is.factor(v) TRUE

levels(v) “linda” “thomas”

typeof(v) “integer”

class(v) “factor”

# Within a ‘data frame’ strings are converted to factors a <- c(“Thomas”, “Linda”, “Felix”, “Sam”) b <- c(34,19,18,56) myDataFrame <- data.frame(a,b) is.factor(a) is.factor(myDataFrame$a)

Result: FALSE TRUE

# Determine factor order a <- factor( c(“Thomas”, “Linda”, “Felix”, “Sam”, “Linda”)

Result: Levels: Felix Linda Sam Thomas

# Change factor order a <- factor( a, levels = c(“Linda”, “Sam”, “Thomas”, “Felix”) )

Result: Levels: Linda Sam Thomas Felix # Generate factor - 4 indicates how many times a factor should occur and 2 indicates how many factors should occur - In this example “Linda” and “Sam” should each occur 4 times. a <- gl(2, 4, labels = c(“Linda”, “Sam”, “Thomas”, “Felix”))

Result: Linda Linda Linda Linda Sam Sam Sam Sam Levels: Linda Sam Thomas Felix Vector

# Create vector myVector <- c(1,2,c(3,4),5)

Result: 1 2 3 4 5 # If a vector contains a string, all non-string values are converted to strings c(‘hello’,0,TRUE)

Result: “hello” “0” “TRUE”

# Get vector indices days <- c(“mo”, “di”, “mi”, “do”, “fr”, “sa”, “so”) days[c(1,3)]

Result: “mo” “mi”

# Every other value days <- c(“mo”, “di”, “mi”, “do”, “fr”, “sa”, “so”) days[c(TRUE,FALSE)]

Result: “mo” “mi” “fr” “so”

# Remove specific indices days <- c(“mo”, “di”, “mi”, “do”, “fr”, “sa”, “so”) days[c(-2,-3)]

Result: “mo” “do” “fr” “sa” “so”

# Custom index order days <- c(“mo”, “di”, “mi”, “do”, “fr”, “sa”, “so”) days[c(1,7,6,5,4,3,2)]

Result: “mo” “so” “sa” “fr” “do” “mi” “di”

# Operators on two vectors +, -, *, /

# Unequal lengths are computed as: 1*1, 2*2, 3*3, 4*1, 5*2, 6*3 a = c(1, 2, 3) b = c(1, 2, 3, 4, 5, 6) a*b

Result: 1 4 9 4 10 18

# Sort vector (numbers and strings) a = c(1, 4, 9, 4, 10, 18) sort(a)

# Reverse sort order sort(a, decreasing = TRUE)

# Get length length (myVector)

# Sort sort(myVector)

# Identify duplicates duplicated (myVector)

# Vector of integers from 1 to 50 1:50

# Third element of a vector a[3]

# First to third elements a[1:3]

# Combine vectors or build table first.name <- c(“Hannes”, “Julia”) last.name <- c(“Müller”, “Schmidt”) myTable <- data.frame(first.name, last.name)

or

x <- c(“a”, “b”) y <- c(“A”, “B”) paste(x,y) # Result: “a A” “b B” “a C”

or

rbin (x, y)

or

cbind (x,y)

List

# Create a list named ‘meineListe’ meineListe <- list(1, 2, c(3,4))

Result: [[1]] [1] 1

[[2]] [1] 2

[[3]] [1] 3 4

# Name list elements names(meineListe) <- c(“Element1”, “Element2”) # name first two list elements

Result: $Element1 [1] 1

$Element2 [1] 2

$ [1] 3 4

# Display list names names(meineListe)

# Access list elements meineListe[1] or meineListe$Element1

# Overwrite entire list meineListe <- list(c(10, 20, 30), “Hello Liste”)

# Overwrite list element meineListe[2] <- “Hello World!”

# Concatenate lists a = list(1,2,3) b = list(4,5,6) meineNeueListe = c(a,b)

# Convert list to vector unlist(meineNeueListe)

Result: [1] 1 2 3 4 5 6

Matrix

# Create matrix (always two-dimensional)

matrix( c(1,2,3,4,5,6), ncol = 3, nrow = 2, byrow = FALSE)

Result: [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6

or

matrix( c(1,2,3,4,5,6), ncol = 3, nrow = 2, byrow = TRUE) Result: [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6

# Matrix with specified row and column names rownames = c(“row1”, “row2”, “row3”) colnames = c(“col1”, “col2”, “col3”, “col4”) myMatrix <- matrix(c(1:12), ncol = 4, nrow = 3, byrow = TRUE, dimnames = list(rownames, colnames))

Result: col1 col2 col3 col4 row1 1 2 3 4 row2 5 6 7 8 row3 9 10 11 12

# Read entire first row myMatrix[1,]

Result: col1 col2 col3 col4 1 2 3 4

# Read entire first column myMatrix[,1]

Result: row1 row2 row3 1 5 9

# Read value from first row and third column myMatrix[1,3]

# Matrix addition (other math ops work similarly) myMatrix1 [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6

myMatrix2 [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6

myMatrix1 + myMatrix2

Result: [,1] [,2] [,3] [1,] 2 4 6 [2,] 8 10 12

Array

# Array (n-dimensional) array(“a”, dim = c(3,4,2) )

Result: , , 1

[,1] [,2] [,3] [,4] [1,] “a” “a” “a” “a” [2,] “a” “a” “a” “a” [3,] “a” “a” “a” “a”

, , 2

[,1] [,2] [,3] [,4] [1,] “a” “a” “a” “a” [2,] “a” “a” “a” “a” [3,] “a” “a” “a” “a”

# Multidimensional vector a <- array(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), dim=c(3, 4))

or

a <- c(1,2,3,4) b <- c(5,6,7,8,9,10,11,12) rownames = c(“row1”, “row2”, “row3”) colnames = c(“col1”, “col2”, “col3”, “col4”) myArray <- array( c(a,b), dim = c(3,4,2), dimnames = list(rownames, colnames) )

Result: , , 1

col1 col2 col3 col4 row1 1 4 7 10 row2 2 5 8 11 row3 3 6 9 12

, , 2

col1 col2 col3 col4 row1 1 4 7 10 row2 2 5 8 11 row3 3 6 9 12

# Output: row 1, column 3 and dimension 2 myArray[1,3,2]

Result: 7

# Output entire second dimension: myArray[,,2]

# Overwrite second dimension array a <- c(13,14,15,16) b <- c(17,18,19,20,21,22,23,24) rownames = c(“row1”, “row2”, “row3”) colnames = c(“col1”, “col2”, “col3”, “col4”) myArray[,,2] <- array( c(a,b), dim = c(3,4), dimnames = list(rownames, colnames) )

# Perform matrix calculations or apply a function, e.g., sum rows across all dimensions: apply(myArray, c(1), sum) # row = 1; column = 2; row & column = c(1,2)

Result: row1 row2 row3 92 100 108

Data Frame

# Data Frame - Values in a column must be of same type and length myDataFrame <- data.frame ( name = c(“Thomas”, “Johan”, “Linda”), alter = c(34, 21, 49), geschlecht = c(“M”, “M”, “W”), stringsAsFactors = FALSE )

Result: name alter geschlecht 1 Thomas 34 M 2 Johan 21 M 3 Linda 49 W

# Show structure of data.frame str(myDataFrame)

Result: ‘data.frame’: 3 obs. of 3 variables: $ name : Factor w/ 3 levels “Johan”,“Linda”,..: 3 1 2 $ alter : num 34 21 49 $ geschlecht: Factor w/ 2 levels “M”,“W”: 1 1 2

# Summarize column values summary(myDataFrame)

Result: name alter geschlecht Length:3 Min. :21.00 Length:3 Class :character 1st Qu.:27.50 Class :character Mode :character Median :34.00 Mode :character Mean :34.67 3rd Qu.:41.50 Max. :49.00

# Read column data.frame(myDataFrame$name)

Result: myDataFrame.name 1 Thomas 2 Johan 3 Linda

# Show first two columns - index starts at 0 myDataFrame[0:1,]

Result: name alter geschlecht 1 Thomas 34 M

# Select columns 1 & 3 - index starts at 1 myDataFrame[c(1,3)]

Result: name geschlecht 1 Thomas M 2 Johan M 3 Linda W

# Add new column myDataFrame$SpaltenBezeichnung <- c(“Wert1”,“Wert2”,“Wert3”)

Result: name alter geschlecht SpaltenBezeichnung 1 Thomas 34 M Wert1 2 Johan 21 M Wert2 3 Linda 49 W Wert3

# Add new rows newDataFrameRows <- data.frame ( name = c(“Igor”, “Andreas”, “Sascha”), alter = c(19, 25, 34), geschlecht = c(“M”, “M”, “W”), SpaltenBezeichnung = c(“new1”,“new2”,“new3”), stringsAsFactors = FALSE )

rbind(myDataFrame,newDataFrameRows)

Result: name alter geschlecht SpaltenBezeichnung 1 Thomas 34 M Wert1 2 Johan 21 M Wert2 3 Linda 49 W Wert3 4 Igor 19 M new1 5 Andreas 25 M new2 6 Sascha 34 W new3

Table

# Open GUI to edit the previously created table myTable <- edit (myTable) or fix (myTable) # Changes are added to the original symbol (myTable)

# Display number of rows and columns of a table dim(tableName)

# List column names colnames(tablName)

# Reduce table to desired columns Input: train[ ,c(‘age’,’name’)]

Condition (if/else)

# if / else x <- 1 if (1 > 2) “yes” else “no”

# Write expressions with semicolon (;) “hello”; 1+1; 1*3

Loop

i = 5 repeat {if (i > 25) break else {print(i); i <- i + 5;}} or while (i <= 25) {print(i); i <- i + 5} or for (i in seq(from=0, to=100, by=5)) print(i)

or

seq(0, 100, by = 5)

Result: 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 Database Connection Setup and Query Execution

Functions

# Different notations and function parameters log2(64) log(x=64,base=2) log(64,2)

# Create function with function arguments x <- function(i) i^2 x(2) # Result: 4

# Write your own function nameDerFunktion <- function(Übergabeparameter) { print(Übergabeparameter) }

# Call function nameDerFunktion(Übergabeparameter)

# Function with multiple parameters sayHelloWorld <- function(s,h) { cat(s, “”, h) } sayHelloWorld(s=“Hello”, h=“World”) or sayHelloWorld(“Hello”, “World”)

# Function with default values sayHelloWorld <- function(s=“H”,h=“W”) { cat(s, “”, h) } sayHelloWorld() # To output default values sayHelloWorld(“Hello”, “World”) # To override default values

Mathematical Functions

# Set number of digits. Maximum 17-digit numbers, e.g. one number and 16 decimal places allowed. Default = 6 decimal places or 7-digit numbers options(digits=17) # Round e.g. square root of 99 round( sqrt(99) ) or round(x=sqrt(99), digits=2) # Specify decimal places or round(sqrt(99), 2)

# Calculate mean x = c(1,2,3) # (1+2+3)/3 mean(x) # 2

# Calculate median x = c(1, 2, 4, 99) median(x) # 3

# Min and max min(x) max(x) or range(x) # 1 99

# Max minus min diff( range(x) ) # Generate random data sample(1:1000, 10, replace = FALSE, prob = NULL)

# Generate ten zeros a <- rep(0, 10)

# Format output format(20.0, nsmall = 10) # Fill decimal places to 10

Result: “20.0000000000”

# Reduce decimal places format(20.123456789, digits = 4)

Result: 20.12

# Scientific notation format(20.0000000000, scientific = TRUE)

Result: 2e+01

# Format input as string format(100)

Result: “100”