Open Source Your Knowledge, Become a Contributor
Technology knowledge has to be shared and made accessible for free. Join the movement.
The Split-Apply-Combine Framework
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Example 1 - the ddply function
library(plyr)
head(baseball)
# Count number of players recorded for each year
bbPerYear = ddply(baseball, "year", "nrow")
head(bbPerYear)
qplot(x = year,
y = nrow,
data = bbPerYear,
geom = "line",
ylab = "number of player seasons")
# Example 2 - another example of the ddply function
# Compute mean rbi (batting attempt resulting in runs)
# for all years. Summarize is the apply function, which
# takes as argument a function that computes the rbi mean
bbMod = ddply(baseball,
"year",
summarise,
mean.rbi = mean(rbi, na.rm = TRUE))
head(bbMod)
qplot(x = year,
y = mean.rbi,
data = bbMod,
geom = "line",
ylab = "mean RBI")
# Example 3 - adding a new variable
# Add a column career.year which measures the number of years
# passed since each player started batting
bbMod2 = ddply(baseball,
"id",
transform,
career.year = year - min(year) + 1)
# Sample a random subset 3000 rows to avoid over-plotting
bbSubset = bbMod2[sample(dim(bbMod2)[1], 3000), ]
qplot(career.year,
rbi, data = bbSubset,
size = I(0.8),
geom = "jitter",
ylab = "RBI",
xlab = "years of playing") +
geom_smooth(color = "red", se = F, size = 1.5)
# Example 4 - the aaply function
dim(ozone)
latitude.mean = aaply(ozone, 1, mean)
longitude.mean = aaply(ozone, 2, mean)
time.mean = aaply(ozone, 3, mean)
longitude = seq(along = longitude.mean)
qplot(x = longitude,
Enter to Rename, Shift+Enter to Preview
Open Source Your Knowledge: become a Contributor and help others learn. Create New Content