tidyTuesday does Pizza
Pizza Ratings
The #tidyTuesday
for this week involves pizza shop ratings data. Let’s see what we have.
pizza_jared <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_jared.csv")
## Parsed with column specification:
## cols(
## polla_qid = col_double(),
## answer = col_character(),
## votes = col_double(),
## pollq_id = col_double(),
## question = col_character(),
## place = col_character(),
## time = col_double(),
## total_votes = col_double(),
## percent = col_double()
## )
pizza_barstool <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_barstool.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## name = col_character(),
## address1 = col_character(),
## city = col_character(),
## country = col_character()
## )
## See spec(...) for full column specifications.
pizza_datafiniti <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_datafiniti.csv")
## Parsed with column specification:
## cols(
## name = col_character(),
## address = col_character(),
## city = col_character(),
## country = col_character(),
## province = col_character(),
## latitude = col_double(),
## longitude = col_double(),
## categories = col_character(),
## price_range_min = col_double(),
## price_range_max = col_double()
## )
summary(pizza_jared)
## polla_qid answer votes pollq_id
## Min. : 2.00 Length:375 Min. : 0.000 Min. : 2.00
## 1st Qu.:21.00 Class :character 1st Qu.: 0.000 1st Qu.:21.00
## Median :40.00 Mode :character Median : 2.000 Median :40.00
## Mean :39.93 Mean : 2.832 Mean :39.93
## 3rd Qu.:59.00 3rd Qu.: 4.000 3rd Qu.:59.00
## Max. :77.00 Max. :26.000 Max. :77.00
##
## question place time total_votes
## Length:375 Length:375 Min. :1.344e+09 Min. : 0.00
## Class :character Class :character 1st Qu.:1.395e+09 1st Qu.: 7.00
## Mode :character Mode :character Median :1.467e+09 Median :12.00
## Mean :1.459e+09 Mean :14.16
## 3rd Qu.:1.519e+09 3rd Qu.:19.00
## Max. :1.569e+09 Max. :67.00
##
## percent
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.1667
## Mean :0.2000
## 3rd Qu.:0.3333
## Max. :1.0000
## NA's :5
summary(pizza_datafiniti)
## name address city country
## Length:10000 Length:10000 Length:10000 Length:10000
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## province latitude longitude categories
## Length:10000 Min. :21.42 Min. :-157.80 Length:10000
## Class :character 1st Qu.:34.42 1st Qu.:-104.80 Class :character
## Mode :character Median :40.12 Median : -82.91 Mode :character
## Mean :38.37 Mean : -90.06
## 3rd Qu.:40.91 3rd Qu.: -75.19
## Max. :64.85 Max. : -71.95
## price_range_min price_range_max
## Min. : 0.000 Min. : 7.00
## 1st Qu.: 0.000 1st Qu.:25.00
## Median : 0.000 Median :25.00
## Mean : 4.655 Mean :27.76
## 3rd Qu.: 0.000 3rd Qu.:25.00
## Max. :50.000 Max. :55.00
summary(pizza_barstool)
## name address1 city zip
## Length:463 Length:463 Length:463 Min. : 1748
## Class :character Class :character Class :character 1st Qu.:10009
## Mode :character Mode :character Mode :character Median :10019
## Mean :18531
## 3rd Qu.:11234
## Max. :94133
##
## country latitude longitude price_level
## Length:463 Min. :25.79 Min. :-122.41 Min. :0.00
## Class :character 1st Qu.:40.72 1st Qu.: -74.09 1st Qu.:1.00
## Mode :character Median :40.75 Median : -73.99 Median :1.00
## Mean :40.19 Mean : -77.44 Mean :1.46
## 3rd Qu.:40.78 3rd Qu.: -73.97 3rd Qu.:2.00
## Max. :45.00 Max. : -70.09 Max. :3.00
## NA's :2 NA's :2
## provider_rating provider_review_count review_stats_all_average_score
## Min. :2.000 Min. : 2.0 Min. :0.100
## 1st Qu.:3.500 1st Qu.: 74.0 1st Qu.:6.240
## Median :3.500 Median : 169.0 Median :7.162
## Mean :3.671 Mean : 386.1 Mean :6.876
## 3rd Qu.:4.000 3rd Qu.: 392.0 3rd Qu.:7.809
## Max. :5.000 Max. :5797.0 Max. :9.079
##
## review_stats_all_count review_stats_all_total_score
## Min. : 1.00 Min. : 0.10
## 1st Qu.: 4.00 1st Qu.: 23.65
## Median : 8.00 Median : 54.10
## Mean : 19.02 Mean : 149.93
## 3rd Qu.: 19.00 3rd Qu.: 140.20
## Max. :568.00 Max. :5045.60
##
## review_stats_community_average_score review_stats_community_count
## Min. : 0.000 Min. : 0.00
## 1st Qu.: 6.075 1st Qu.: 3.00
## Median : 7.225 Median : 7.00
## Mean : 6.457 Mean : 17.87
## 3rd Qu.: 7.873 3rd Qu.: 18.00
## Max. :10.000 Max. :567.00
##
## review_stats_community_total_score review_stats_critic_average_score
## Min. : 0.00 Min. : 0.0000
## 1st Qu.: 15.65 1st Qu.: 0.0000
## Median : 47.30 Median : 0.0000
## Mean : 142.28 Mean : 0.9717
## 3rd Qu.: 135.10 3rd Qu.: 0.0000
## Max. :5036.30 Max. :11.0000
##
## review_stats_critic_count review_stats_critic_total_score
## Min. :0.0000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.: 0.000
## Median :0.0000 Median : 0.000
## Mean :0.1425 Mean : 1.023
## 3rd Qu.:0.0000 3rd Qu.: 0.000
## Max. :5.0000 Max. :29.800
##
## review_stats_dave_average_score review_stats_dave_count
## Min. : 0.080 Min. :1
## 1st Qu.: 6.200 1st Qu.:1
## Median : 7.100 Median :1
## Mean : 6.623 Mean :1
## 3rd Qu.: 7.800 3rd Qu.:1
## Max. :10.000 Max. :1
##
## review_stats_dave_total_score
## Min. : 0.080
## 1st Qu.: 6.200
## Median : 7.100
## Mean : 6.623
## 3rd Qu.: 7.800
## Max. :10.000
##
I will use this data; it contains some Oregon pizzarias.
pizza_datafiniti %>% filter(province=="OR")
## # A tibble: 122 x 10
## name address city country province latitude longitude categories
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 2 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 3 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 4 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 5 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 6 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 7 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 8 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 9 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## 10 Cobu… 1710 C… Spri… US OR 44.1 -123. Restauran…
## # … with 112 more rows, and 2 more variables: price_range_min <dbl>,
## # price_range_max <dbl>
For the first plot, let me show what is going on in Oregon.
pizzaData <- pizza_datafiniti %>% filter(province=="OR") %>% group_by(name) %>% mutate(Tcount = n(), PriceAvg = mean(price_range_max - price_range_min)) %>% ungroup()
p <- pizzaData %>% ggplot(.) + aes(x=reorder(name,PriceAvg), fill=PriceAvg) + geom_bar() + coord_flip() + labs(x="Pizza Restaurant", y="Count", title="Oregon Pizzerias", caption = "data from #tidyTuesday; sorted by Average Price")
p
Now I want a map.
pizzaData$group <- 44
states <- map_data("state")
OR.df <- subset(states, region == "oregon")
OR_base <- ggplot(data = OR.df, mapping = aes(x = long, y = lat, group = group)) +
geom_polygon(color = "black", fill = "gray") + labs(title="Oregon Pizzerias")
OR_base
OR.Pizza <- OR_base + geom_point(data = pizzaData, aes(y=latitude, x=longitude, group=group), color = "red")
OR.Pizza
Combine them into one picture
grid.arrange(p,OR.Pizza, ncol=2)
Now to use a girafe to put this together interactively.
theme_set(theme_minimal())
pizzaData <- pizzaData %>% mutate(name = str_remove(name, "['``]"))
# Build the barplot
gg1 <- ggplot(pizzaData, aes(x=reorder(name,PriceAvg), fill=PriceAvg)) +
geom_bar_interactive(aes(x = name, tooltip = name, data_id = name)) +
coord_flip() + scale_fill_viridis_c() + theme(axis.text=element_text(size=8)) +
labs(x="Pizza Restaurant", y="Count", caption = "data from #tidyTuesday", sub="sorted by Average Price")
# Build the map
gg2 <- ggplot(data = OR.df, mapping = aes(x = long, y = lat, group = group)) +
geom_polygon(fill=gray(0.95)) +
geom_text_interactive(data = pizzaData, aes(y=latitude, x=longitude, label=emoji('pizza'), tooltip = name, data_id = name), family='EmojiOne', size=2, color="orange") +
scale_color_viridis_c(guide=FALSE) +
theme_nothing()
# Bind them together with girafe
a <- girafe( code = print(gg1 / gg2 + plot_annotation(title="Pizza in Oregon")))
library(widgetframe)
## Loading required package: htmlwidgets
frameWidget(a, width = "100%", height = "100%")
The formatting of this is quite off. For some reason, the frame doesn’t control scroll and overplots.
Rayshader
And a mini-rayshader. This is a complete lift and replace of the vignette after calculating the Price Midpoint.
states <- map_data("state")
PDF <- pizza_datafiniti %>% mutate(Price.Midpoint = (price_range_min + price_range_max / 2))
mtplot <- ggplot(data = states, mapping = aes(x = long, y = lat)) +
geom_polygon(color = "black", fill = "gray") +
geom_point(data=PDF, aes(x = longitude, y = latitude, color = Price.Midpoint)) +
scale_color_viridis_c() + theme_minimal()
plot_gg(mtplot, width = 3.5, multicore = TRUE, windowsize = c(1200, 1200),
zoom = 0.5, phi = 35, theta = 30, sunangle = 15, soliddepth = -20)
render_snapshot(clear = TRUE)