tidyTuesday does Pizza

Pizza Ratings

The #tidyTuesday for this week involves pizza shop ratings data. Let’s see what we have.

pizza_jared <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_jared.csv")
## Parsed with column specification:
## cols(
##   polla_qid = col_double(),
##   answer = col_character(),
##   votes = col_double(),
##   pollq_id = col_double(),
##   question = col_character(),
##   place = col_character(),
##   time = col_double(),
##   total_votes = col_double(),
##   percent = col_double()
## )
pizza_barstool <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_barstool.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   name = col_character(),
##   address1 = col_character(),
##   city = col_character(),
##   country = col_character()
## )
## See spec(...) for full column specifications.
pizza_datafiniti <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_datafiniti.csv")
## Parsed with column specification:
## cols(
##   name = col_character(),
##   address = col_character(),
##   city = col_character(),
##   country = col_character(),
##   province = col_character(),
##   latitude = col_double(),
##   longitude = col_double(),
##   categories = col_character(),
##   price_range_min = col_double(),
##   price_range_max = col_double()
## )
summary(pizza_jared)
##    polla_qid        answer              votes           pollq_id    
##  Min.   : 2.00   Length:375         Min.   : 0.000   Min.   : 2.00  
##  1st Qu.:21.00   Class :character   1st Qu.: 0.000   1st Qu.:21.00  
##  Median :40.00   Mode  :character   Median : 2.000   Median :40.00  
##  Mean   :39.93                      Mean   : 2.832   Mean   :39.93  
##  3rd Qu.:59.00                      3rd Qu.: 4.000   3rd Qu.:59.00  
##  Max.   :77.00                      Max.   :26.000   Max.   :77.00  
##                                                                     
##    question            place                time            total_votes   
##  Length:375         Length:375         Min.   :1.344e+09   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.:1.395e+09   1st Qu.: 7.00  
##  Mode  :character   Mode  :character   Median :1.467e+09   Median :12.00  
##                                        Mean   :1.459e+09   Mean   :14.16  
##                                        3rd Qu.:1.519e+09   3rd Qu.:19.00  
##                                        Max.   :1.569e+09   Max.   :67.00  
##                                                                           
##     percent      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.1667  
##  Mean   :0.2000  
##  3rd Qu.:0.3333  
##  Max.   :1.0000  
##  NA's   :5
summary(pizza_datafiniti)
##      name             address              city             country         
##  Length:10000       Length:10000       Length:10000       Length:10000      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    province            latitude       longitude        categories       
##  Length:10000       Min.   :21.42   Min.   :-157.80   Length:10000      
##  Class :character   1st Qu.:34.42   1st Qu.:-104.80   Class :character  
##  Mode  :character   Median :40.12   Median : -82.91   Mode  :character  
##                     Mean   :38.37   Mean   : -90.06                     
##                     3rd Qu.:40.91   3rd Qu.: -75.19                     
##                     Max.   :64.85   Max.   : -71.95                     
##  price_range_min  price_range_max
##  Min.   : 0.000   Min.   : 7.00  
##  1st Qu.: 0.000   1st Qu.:25.00  
##  Median : 0.000   Median :25.00  
##  Mean   : 4.655   Mean   :27.76  
##  3rd Qu.: 0.000   3rd Qu.:25.00  
##  Max.   :50.000   Max.   :55.00
summary(pizza_barstool)
##      name             address1             city                zip       
##  Length:463         Length:463         Length:463         Min.   : 1748  
##  Class :character   Class :character   Class :character   1st Qu.:10009  
##  Mode  :character   Mode  :character   Mode  :character   Median :10019  
##                                                           Mean   :18531  
##                                                           3rd Qu.:11234  
##                                                           Max.   :94133  
##                                                                          
##    country             latitude       longitude        price_level  
##  Length:463         Min.   :25.79   Min.   :-122.41   Min.   :0.00  
##  Class :character   1st Qu.:40.72   1st Qu.: -74.09   1st Qu.:1.00  
##  Mode  :character   Median :40.75   Median : -73.99   Median :1.00  
##                     Mean   :40.19   Mean   : -77.44   Mean   :1.46  
##                     3rd Qu.:40.78   3rd Qu.: -73.97   3rd Qu.:2.00  
##                     Max.   :45.00   Max.   : -70.09   Max.   :3.00  
##                     NA's   :2       NA's   :2                       
##  provider_rating provider_review_count review_stats_all_average_score
##  Min.   :2.000   Min.   :   2.0        Min.   :0.100                 
##  1st Qu.:3.500   1st Qu.:  74.0        1st Qu.:6.240                 
##  Median :3.500   Median : 169.0        Median :7.162                 
##  Mean   :3.671   Mean   : 386.1        Mean   :6.876                 
##  3rd Qu.:4.000   3rd Qu.: 392.0        3rd Qu.:7.809                 
##  Max.   :5.000   Max.   :5797.0        Max.   :9.079                 
##                                                                      
##  review_stats_all_count review_stats_all_total_score
##  Min.   :  1.00         Min.   :   0.10             
##  1st Qu.:  4.00         1st Qu.:  23.65             
##  Median :  8.00         Median :  54.10             
##  Mean   : 19.02         Mean   : 149.93             
##  3rd Qu.: 19.00         3rd Qu.: 140.20             
##  Max.   :568.00         Max.   :5045.60             
##                                                     
##  review_stats_community_average_score review_stats_community_count
##  Min.   : 0.000                       Min.   :  0.00              
##  1st Qu.: 6.075                       1st Qu.:  3.00              
##  Median : 7.225                       Median :  7.00              
##  Mean   : 6.457                       Mean   : 17.87              
##  3rd Qu.: 7.873                       3rd Qu.: 18.00              
##  Max.   :10.000                       Max.   :567.00              
##                                                                   
##  review_stats_community_total_score review_stats_critic_average_score
##  Min.   :   0.00                    Min.   : 0.0000                  
##  1st Qu.:  15.65                    1st Qu.: 0.0000                  
##  Median :  47.30                    Median : 0.0000                  
##  Mean   : 142.28                    Mean   : 0.9717                  
##  3rd Qu.: 135.10                    3rd Qu.: 0.0000                  
##  Max.   :5036.30                    Max.   :11.0000                  
##                                                                      
##  review_stats_critic_count review_stats_critic_total_score
##  Min.   :0.0000            Min.   : 0.000                 
##  1st Qu.:0.0000            1st Qu.: 0.000                 
##  Median :0.0000            Median : 0.000                 
##  Mean   :0.1425            Mean   : 1.023                 
##  3rd Qu.:0.0000            3rd Qu.: 0.000                 
##  Max.   :5.0000            Max.   :29.800                 
##                                                           
##  review_stats_dave_average_score review_stats_dave_count
##  Min.   : 0.080                  Min.   :1              
##  1st Qu.: 6.200                  1st Qu.:1              
##  Median : 7.100                  Median :1              
##  Mean   : 6.623                  Mean   :1              
##  3rd Qu.: 7.800                  3rd Qu.:1              
##  Max.   :10.000                  Max.   :1              
##                                                         
##  review_stats_dave_total_score
##  Min.   : 0.080               
##  1st Qu.: 6.200               
##  Median : 7.100               
##  Mean   : 6.623               
##  3rd Qu.: 7.800               
##  Max.   :10.000               
## 

I will use this data; it contains some Oregon pizzarias.

pizza_datafiniti %>% filter(province=="OR")
## # A tibble: 122 x 10
##    name  address city  country province latitude longitude categories
##    <chr> <chr>   <chr> <chr>   <chr>       <dbl>     <dbl> <chr>     
##  1 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  2 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  3 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  4 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  5 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  6 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  7 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  8 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
##  9 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
## 10 Cobu… 1710 C… Spri… US      OR           44.1     -123. Restauran…
## # … with 112 more rows, and 2 more variables: price_range_min <dbl>,
## #   price_range_max <dbl>

For the first plot, let me show what is going on in Oregon.

pizzaData <- pizza_datafiniti %>% filter(province=="OR") %>% group_by(name) %>% mutate(Tcount = n(), PriceAvg = mean(price_range_max - price_range_min)) %>% ungroup()
p <- pizzaData %>% ggplot(.) + aes(x=reorder(name,PriceAvg), fill=PriceAvg) + geom_bar() + coord_flip() + labs(x="Pizza Restaurant", y="Count", title="Oregon Pizzerias", caption = "data from #tidyTuesday; sorted by Average Price")
p

Now I want a map.

pizzaData$group <- 44
states <- map_data("state")
OR.df <- subset(states, region == "oregon")
OR_base <- ggplot(data = OR.df, mapping = aes(x = long, y = lat, group = group)) +
geom_polygon(color = "black", fill = "gray") + labs(title="Oregon Pizzerias")
OR_base

OR.Pizza <- OR_base + geom_point(data = pizzaData, aes(y=latitude, x=longitude, group=group), color = "red")
OR.Pizza

Combine them into one picture

grid.arrange(p,OR.Pizza, ncol=2)

Now to use a girafe to put this together interactively.

theme_set(theme_minimal())
pizzaData <- pizzaData %>% mutate(name = str_remove(name, "['``]"))
# Build the barplot
gg1 <- ggplot(pizzaData, aes(x=reorder(name,PriceAvg), fill=PriceAvg)) +
  geom_bar_interactive(aes(x = name, tooltip = name, data_id = name))  + 
  coord_flip() + scale_fill_viridis_c() + theme(axis.text=element_text(size=8)) +
  labs(x="Pizza Restaurant", y="Count", caption = "data from #tidyTuesday", sub="sorted by Average Price") 
# Build the map
gg2 <- ggplot(data = OR.df, mapping = aes(x = long, y = lat, group = group)) +
  geom_polygon(fill=gray(0.95)) + 
  geom_text_interactive(data = pizzaData, aes(y=latitude, x=longitude, label=emoji('pizza'), tooltip = name, data_id = name), family='EmojiOne', size=2, color="orange") + 
  scale_color_viridis_c(guide=FALSE) +
  theme_nothing()
# Bind them together with girafe
a <- girafe( code = print(gg1 / gg2 + plot_annotation(title="Pizza in Oregon")))
library(widgetframe)
## Loading required package: htmlwidgets
frameWidget(a, width = "100%", height = "100%")

The formatting of this is quite off. For some reason, the frame doesn’t control scroll and overplots.

Rayshader

And a mini-rayshader. This is a complete lift and replace of the vignette after calculating the Price Midpoint.

states <- map_data("state")
PDF <- pizza_datafiniti %>% mutate(Price.Midpoint = (price_range_min + price_range_max / 2))
mtplot <- ggplot(data = states, mapping = aes(x = long, y = lat)) +
  geom_polygon(color = "black", fill = "gray") + 
  geom_point(data=PDF, aes(x = longitude, y = latitude, color = Price.Midpoint)) + 
  scale_color_viridis_c() + theme_minimal()
plot_gg(mtplot, width = 3.5, multicore = TRUE, windowsize = c(1200, 1200), 
        zoom = 0.5, phi = 35, theta = 30, sunangle = 15, soliddepth = -20)
render_snapshot(clear = TRUE)

Avatar
Robert W. Walker
Associate Professor of Quantitative Methods

My research interests include causal inference, statistical computation and data visualization.

Next
Previous

Related