R plot color legend by factor

colorslegendplotrr-factor

Using R 3.3.1 in Windows 10. I'm making an x-y plot from 95 rows of data. The data are in 6 different groupings (a factor called "group"). The plot itself is easy enough, but I can't get the legend to properly account for the factor and color correctly.

Here's the data in a variable v1:

v1 <- structure(list(group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("F9", "T26", "W37", 
"W40", "W41", "W42"), class = "factor"), point = c(1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L), x = c(-7.064, -5.1681, 
-6.4866, -2.7522, -4.6305, -4.2957, -3.7552, -4.9482, -5.6452, 
-6.0302, -5.3244, -3.9819, -3.8123, -5.3085, -5.6096, -6.4557, 
-5.2549, -3.4893, -3.5909, -2.5546, -3.7247, -5.1733, -3.3451, 
-2.8993, -2.6835, -3.9495, -4.9649, -2.8438, -4.6926, -3.4768, 
-3.1221, -4.8175, -4.5641, -3.549, -3.08, -2.4153, -2.9882, -3.4045, 
-4.6394, -3.3404, -2.6728, -3.3517, -2.6098, -3.7733, -4.051, 
-2.9385, -4.5024, -4.59, -4.5617, -4.0658, -2.4986, -3.7559, 
-4.245, -4.8045, -4.6615, -4.0696, -4.6638, -4.6505, -3.7978, 
-4.5649, -5.7669, -4.519, -3.8561, -3.779, -3.0549, -3.1241, 
-2.1423, -3.2759, -4.224, -4.028, -3.3412, -2.8832, -3.3866, 
-0.1852, -3.3763, -4.317, -5.3607, -3.3398, -1.9087, -4.431, 
-3.7535, -3.2545, -0.806, -3.1419, -3.7269, -3.4853, -4.3129, 
-2.8891, -3.0572, -5.3309, -2.5837, -4.1128, -4.6631, -3.4695, 
-4.1045), y = c(7.76, 0.72, 4.1, 1.36, 0.13, -0.02, 0.13, 0.42, 
1.49, 2.64, 1.01, 0.08, 0.22, 1.01, 1.53, 4.39, 0.99, 0.56, 0.43, 
2.31, 0.31, 0.59, 0.62, 1.65, 2.12, 0.1, 0.24, 1.68, 0.09, 0.59, 
1.23, 0.4, 0.36, 0.49, 1.41, 3.29, 1.22, 0.56, 0.1, 0.67, 2.38, 
0.43, 1.56, 0.07, 0.08, 1.53, -0.01, 0.12, 0.1, 0.04, 3.42, 0.23, 
0, 0.34, 0.15, 0.03, 0.19, 0.17, 0.2, 0.09, 2.3, 0.07, 0.15, 
0.18, 1.07, 1.21, 3.4, 0.8, -0.04, 0.02, 0.74, 1.59, 0.71, 10.64, 
0.64, -0.01, 1.06, 0.81, 4.58, 0.01, 0.14, 0.59, 7.35, 0.63, 
0.17, 0.38, -0.08, 1.1, 0.89, 0.94, 1.52, 0.01, 0.1, 0.38, 0.02
)), .Names = c("group", "point", "x", "y"), class = "data.frame", row.names = c(NA, 
-95L))

Here's the plot my attempts to overlay a legend:

> attach(v1)    
> plot(x,y, pch=16, col=group)   #simple plot, automatic colors
> #first legend
> legend("topleft", legend=group, pch=16, col=group)
> # colors matched, but it's breaking out every point
> legend("topright", legend=levels(group), pch=16, col=group)
> # Corrected the number of levels in legend, but no colors
>

You can see that the first legend appears correct color-wise, but it shows an entry for every point and runs out of space. The second legend shows group as factor levels, which is what I want, but it doesn't change the colors.

I realize that I could color as a vector (e.g. col(c("black","red", etc.), but since the original plot command automatically assigned colors, I'm looking to do it "automatically" in my legend and avoid the risk of putting the wrong colors in my vector.

Thanks!

Best Answer

base R solution:

attach(v1)    
plot(x,y, pch=16, col=group)
legend("topleft", legend=levels(group), pch=16, col=unique(group))

ggplot2 solution

ggplot(v1)+
   geom_point(aes(x=x,y=y,colour=group))+
   theme_bw()

Again, I would strongly suggest the use of ggplot2 over base R unless you're only exploring the data. There are plenty of questions/answers on the matter on SO.

Related Topic