5.3.3 Group Columns and Rows
OREdplyr
functions for grouping columns and rows.
Table 5-4 Grouping Columns and Rows
Function | Description |
---|---|
|
Groups an |
|
Lists the number of rows in each group. |
|
Shows the names of the grouping columns. |
|
Returns the number of groups. |
|
Drops the grouping from the input |
Example 5-78 Using Grouping Functions
The following examples use the ore.frame
object MTCARS that is created by using the ore.push
function on the mtcars data.frame
object. They exemplify the use of the grouping functions group_by
, group_size
, groups
, n_group
, and ungroup
. They also use the OREdplyr
functions arrange
, rename
, and summarize
.
MTCARS <- ore.push(mtcars)
by_cyl <- group_by(MTCARS, cyl)
# Apply the summarise function to each group
arrange(summarise(by_cyl, mean(disp), mean(hp)), cyl)
# Summarise drops one layer of grouping
by_vs_am <- group_by(MTCARS, vs, am)
by_vs <- summarise(by_vs_am, n = n())
arrange(by_vs, vs, am)
arrange(summarise(by_vs, n = sum(n)), vs)
# Remove grouping
summarise(ungroup(by_vs), n = sum(n))
# Group by expressions with mutate
arrange(group_size(group_by(mutate(MTCARS, vsam = vs + am), vsam)), vsam)
# Rename the grouping column
groups(rename(group_by(MTCARS, vs), vs2 = vs))
# Add more grouping columns
groups(group_by(by_cyl, vs, am))
groups(group_by(by_cyl, vs, am, add = TRUE))
# Drop duplicate groups
groups(group_by(by_cyl, cyl, cyl))
# Load the magrittr library to use the forward-pipe operator %>%
library(magrittr)
by_cyl_gear_carb <- MTCARS %>% group_by(cyl, gear, carb)
n_groups(by_cyl_gear_carb)
arrange(group_size(by_cyl_gear_carb), cyl, gear, carb)
by_cyl <- MTCARS %>% group_by(cyl)
# Number of groups
n_groups(by_cyl)
# Size of each group
arrange(group_size(by_cyl), cyl)
Listing for This Example
R> MTCARS <- ore.push(mtcars)
R> by_cyl <- group_by(MTCARS, cyl)
R>
R> # Apply the summarise function to each group
R> arrange(summarise(by_cyl, mean(disp), mean(hp)), cyl)
cyl mean.disp. mean.hp.
1 4 105.1364 82.63636
2 6 183.3143 122.28571
3 8 353.1000 209.21429
R>
R> # Summarise drops one layer of grouping
R> by_vs_am <- group_by(MTCARS, vs, am)
R> by_vs <- summarise(by_vs_am, n = n())
R> arrange(by_vs, vs, am)
vs am n
1 0 0 12
2 0 1 6
3 1 0 7
4 1 1 7
R> arrange(summarise(by_vs, n = sum(n)), vs)
vs n
1 0 18
2 1 14
R>
R> # Remove grouping
R> summarise(ungroup(by_vs), n = sum(n))
n
32
R>
R> # Group by expressions with mutate
R> arrange(group_size(group_by(mutate(MTCARS, vsam = vs + am), vsam)), vsam)
vsam n
1 0 12
2 1 13
3 2 7
R>
R> # Rename the grouping column
R> groups(rename(group_by(MTCARS, vs), vs2 = vs))
[1] "vs2"
R>
R> # Add more grouping columns
R> groups(group_by(by_cyl, vs, am))
[[1]]
[1] "vs"
[[2]]
[1] "am"
R> groups(group_by(by_cyl, vs, am, add = TRUE))
[[1]]
[1] "cyl"
[[2]]
[1] "vs"
[[3]]
[1] "am"
R>
R> # Drop duplicate groups
R> groups(group_by(by_cyl, cyl, cyl))
[1] "cyl
R>
R> # Load the magrittr library to use the forward-pipe operator %>%
R> library(magrittr)
R> by_cyl_gear_carb <- MTCARS %>% group_by(cyl, gear, carb)
R> n_groups(by_cyl_gear_carb)
[1] 12
R> arrange(group_size(by_cyl_gear_carb), cyl, gear, carb)
cyl gear carb n
1 4 3 1 1
2 4 4 1 4
3 4 4 2 4
4 4 5 2 2
5 6 3 1 2
6 6 4 4 4
7 6 5 6 1
8 8 3 2 4
9 8 3 3 3
10 8 3 4 5
11 8 5 4 1
12 8 5 8 1
R>
R> by_cyl <- MTCARS %>% group_by(cyl)
R> # Number of groups
R> n_groups(by_cyl)
[1] 3
R> # Number of groups
R> n_groups(by_cyl)
[1] 3
R>
R> # Size of each group
R> arrange(group_size(by_cyl), cyl)
cyl n
1 4 11
2 6 7
3 8 14
Parent topic: Data Manipulation Using OREdplyr