Sample Work

On this page you’ll find a sampling of R and SQL code I’ve developed at work. All of the data is fake. To see the code, click on the buttons below the charts.

Slider

Code

SQL used in Periscope to query the data:

with 
  base as (select * from [all_regions_and_quarters]),
  data as
  (
    select
      [so.finance_date__c:quarter] as q,
      [user_subregion],
      [arr]
    from 
      salesforce.opportunity so
      join salesforce.user su 
        on su.id = so.ownerid
      left join salesforce.account sa
        on sa.ownerid = su.id
    where
      region is not null
    group by 1,2
    order by 2,1
  )
select
  base.region, 
  base.quarter,
  nvl(data.arr,0) as nacv,
  row_number() over(
    partition by base.region
    order by base.quarter
    ) as f
from base
left join data
on 
  base.region = data.region
  and base.quarter = data.q
where quarter between '2018.01.01' and '2020.01.01'
order by 1,2

Creating fake data: (Date format is what [so.finance_date__c:quarter] outputs in Periscope SQL)

region <- c("AMS West", "Northern Europe", "Japan", "Federal", "AMS Northeast", "APAC", "AMS Southeast", "Southern Europe", "METNA", "AMS Central", "UK/I", "Central Europe")

quarter <- c("2018-01-01", "2018-04-01", "2018-07-01", "2018-10-01", "2019-01-01", "2019-04-01", "2019-07-01", "2019-10-01", "2020-01-01")

merge(region,quarter,by=NULL) %>%
  rename(region=x, quarter=y) %>%
  mutate_all(as.character) %>%
  group_by(region) %>%
  mutate(f = order(order(quarter))) -> df1
  
nice_q <- function(date){
  year <- substr(date, 1, 4)
  q <- substr(date, 6, 7)
  q <- ifelse(q=='01',"Q1",ifelse(q=='04',"Q2",ifelse(q=='07',"Q3","Q4")))
  paste(year,q)
}

df1$quarter %<>% map_chr(nice_q)
df1$nacv <- sample(0:100000, nrow(df1))

df1

The fake data:

Creating the plot:

df1 %>%
    ggplot() +
    geom_segment(aes(x=region, xend=region, y=0, yend=nacv, color=region, frame=quarter, size=I(10))) +
    theme_bw()
    ggplotly(tooltip=c("x", "yend", "frame"), hoveron="points+fills") %>%
    animation_opts(1000, easing = "elastic", redraw = FALSE) %>%
    animation_slider(currentvalue = list(prefix = "YEAR ", font = list(color=rgb(0,0,0,alpha=0)))) %>%
    layout(xaxis = list(title = "", tickangle = 25), yaxis = list(title = "NACV"))

Buttons

The R code here both makes the chart & splits pipeline across the different channel sources according to some custom business logic, which is defined in channel_handling() below.

Code

SQL used in Periscope to query the data:

with base as (select * from [all_regions_and_channels]),

pipeline as(
  select
  [channel_new],
  [user_subregion],
  [field_sales_region] as Theater,
  sum(case when [filter_pipeline_attr_closing_currentYear]
      then [field_estimated_narr] * bat.bizible2__attribution_custom_model__c / 100 end) as year_pipeline,
  sum(case when [filter_pipeline_attr_closing_currentQuarter]
      then [field_estimated_narr] * bat.bizible2__attribution_custom_model__c / 100 end) as quarter_pipeline
  from
    [join_opp_bat_user]
  where
    [Theater=Sales_Theater]
    and [region=Sales_Region]
    and [su.firstname || ' ' || su.lastname=Sales_Rep]
    and [field_sales_region] not like 'Channel'
  group by 1,2,3
  ),

combined as (
  select
    base.Region,
    base.Channel,
    base.Theater,
    nvl(year_pipeline, 0) as year_pipeline,
    nvl(quarter_pipeline, 0) as quarter_pipeline
  from
    base
    left join pipeline on
      base.Region = pipeline.Region
      and base.Channel = pipeline.Channel
      and base.Theater = pipeline.Theater
  where [base.Theater=Sales_Theater]
  and [base.region=Sales_Region] 
  )

select
  combined.Region,
  combined.Channel,
  sum(year_pipeline) as year_pipeline,
  sum(quarter_pipeline) as quarter_pipeline,
  max(cast(gs.pipeline_y_goal as numeric)) as year_goal,
  max(cast(gs.pipeline_q_goal as numeric)) as quarter_goal
from
  combined
  left join google_sheets.goal_sales gs
  on
    gs.region = combined.Region
where
  [gs.Region=Sales_Region]
  and [gs.theater=Sales_Theater]
group by 1,2
order by 1,2

Creating fake data:

region <- c("AMS West", "Northern Europe", "Japan", "Federal", "AMS Northeast", "APAC", "AMS Southeast", "Southern Europe", "METNA", "AMS Central", "UK/I", "Central Europe")

channel <- c("Channel Marketing", "Partner", "Sales", "Marketing", "Source")

merge(region,channel,by=NULL) %>%
  rename(region=x, channel=y) %>%
  mutate_all(as.character) -> df2

df2$year_pipeline <- sample(0:2000001, nrow(df2), replace=TRUE)
df2$year_goal <- sample(0:2000000, nrow(df2), replace=TRUE)
df2$quarter_pipeline <- sample(0:500001, nrow(df2), replace=TRUE)
df2$quarter_goal <- sample(0:500000, nrow(df2), replace=TRUE)

df2

The fake data:

Creating the plot:

year_goal <- df2 %$% sum(year_goal) %>% (function(x) x/15)
quarter_goal <- df2 %$% sum(quarter_goal) %>% (function(x) x/15)

year_pipeline <- df2 %>% dcast(region ~ channel, value.var='year_pipeline')
quarter_pipeline <- df2 %>% dcast(region ~ channel, value.var='quarter_pipeline')

channel_handling <- function(df){
    df %>% 
    mutate(
      Marketing = Marketing + `Channel Marketing`/2,
      Partner = Partner + Source/2 + `Channel Marketing`/2,
      Sales = Sales + Source/2
    ) %>%
    select(Marketing, Partner, Sales) %>%
    colSums()
}

rbind(channel_handling(year_pipeline), channel_handling(quarter_pipeline)) %>%
  cbind(c("year_pipeline","quarter_pipeline"),.) %>%
  as.data.frame() %>%
  melt(measure.vars=c('Marketing','Partner','Sales')) %>%
  dcast(variable ~ V1) %>%
  rename(channel=variable) %>%
  mutate(
    quarter_goal = quarter_goal,
    year_goal = year_goal
  ) -> df2

library("plotly")

ht <- "<b>%{text}</b><br>$%{y:0.3s}<br>%{x}<br><extra></extra>"                                            
                                              
list(
  list(
    active = 0,
    type= 'buttons',
    x=.5, y = 1.1,
    font = list(size="15"),
    yanchor='center', xanchor='center',
    direction='right',
    buttons = list(
        list(label = "Current Year", method = "update", args = list(list(visible = c(FALSE, TRUE)), list(title = " "))),
      list(label = "Current Quarter", method = "update", args = list(list(visible = c(TRUE, FALSE)), list(title = " ")))
        )
  )
) -> menu

df2 %>% 
    plot_ly(type='bar', hovertemplate=ht) %>%
    add_trace(x=~channel, y=~year_pipeline, name="Current Year", text="Current Year", marker = list(color = '#70B8FF')) %>%
    add_trace(x=~channel, y=~quarter_pipeline, name="Current Quarter", text="Current Quarter", marker = list(color = '#70B8FF'), visible=F) %>%
    add_lines(x=~channel, y=~year_goal, name = "Year Goal", text = "Year Goal", line=list(color="#8849A5")) %>%
    add_lines(x=~channel, y=~quarter_goal, name = "Quarter Goal", text = "Quarter Goal", line=list(color="#8849A5"), visible=F) %>%
    layout(showlegend=TRUE, xaxis=list(title="Channel"), yaxis=list(title="ARR", tickprefix="$"), title=" ", updatemenus=menu) %>%
periscope.plotly()

Waterfall

Code

SQL used in Periscope to query the data:

select
  sum(case when [so.finance_date__c:year] between 1999 and 2020
      then nvl(so.churn__c,0) + nvl(so.narr__c,0) end) as beginning,
  sum(case when [so.finance_date__c:quarter] = '2020.01.01'
      then nvl(so.churn__c,0) + nvl(so.narr__c,0) end) as q1,
  sum(case when [so.finance_date__c:quarter] = '2020.04.01'
      then nvl(so.churn__c,0) + nvl(so.narr__c,0) end) as q2,
  sum(case when [so.finance_date__c:quarter] = '2020.07.01'
      then nvl(so.churn__c,0) + nvl(so.narr__c,0) end) as q3,
  sum(case when [so.finance_date__c:quarter] = '2020.10.01'
      then nvl(so.churn__c,0) + nvl(so.narr__c,0) end) as q4
from
  salesforce.opportunity so
  join salesforce.user su on
    so.ownerid = su.id
where
  so.isdeleted = false
  and so.stagename in ('Closed Won', 'Closed Lost')

Creating fake data:

# Data that comes out of SQL is wide like this (each q its own column)

mm <- 10^6 # *mm = million

df <- tibble(
  Start = 10*mm,
  Q1 = 0.4*mm,
  Q2 = 0.8*mm,
  Q3 = 1.9*mm,
  Q4 = 1.4*mm
)

The fake data:

Making the plot:

# Totals
df %<>% 
  melt(
    variable.name='q',
    value.name='arr',
    measure.vars=colnames(df)
  ) %>%
  adorn_totals("row")

df$id <- seq_along(df$arr) # id for later use
df$top <- cumsum(df$arr) # cumulative sum of amount
df$top <- c(head(df$top,-1),0) # same as above but ending with 0
df$bot <- c(0, head(df$top,-1)) # same as above but starting with 0
df[c(3,1,5,2,4)]->df # reorder columns
mutate(
  df,
  Goal=c(10*mm, 11*mm, 12*mm, 13*mm, 14*mm, 14*mm)
)->df

ggplotly(
  ggplot(df) +
    geom_rect(
      aes(xmin=id-.4, xmax=id+.4, ymin=bot, ymax=top),
      fill=rgb(111,185,255,maxColorValue = 255)
    ) +
    geom_step(
      aes(x=id+.5, y=Goal),
      direction="vh",
      color=rgb(136,74,165,maxColorValue = 255)
    ) +
    scale_x_continuous(
      breaks=c(1,2,3,4,5,6),
      labels=c("Start","Q1","Q2","Q3","Q4","Cumulative")
    ) +
    scale_y_continuous(
      labels=dollar,
      breaks=seq(0,60000000,5000000),
      expand = c(0, 0)
    )+
    xlab("Quarter") + ylab("ARR") +
    theme_bw() + theme(panel.border = element_blank())
) -> p

#Turn off hover info for line & bar
p$x$data[[1]]$hoverinfo <- "none"
p$x$data[[2]]$hoverinfo <- "none"

p

Funnel

Code

Creating fake data:

k <- 1000

df <- tibble(
  Stage = map_chr(1:5,(function(i) paste("Stage",i))),
  Users = c(95*k, 90*k, 85*k, 75*k, 70*k)
)

The fake data:

Creating the plot:

# Plot is made with ggplot2 then converted to plotly
# First: make the main bars with geom_rect()
# Then: create all the logic for the lighter-colored in-between trapezoids with geom_polygon() and poly df
# The 4 different poly dfs basically define one point in the trapezoid's shape
# When merged, they define 4 different trapezoids with 4 points each

# Alter df so that main bars can be made in geom_rect()
df <- df %>%
  mutate(
    `% of Total` = Users / first(Users),
    `% Label` = paste(round((`% of Total` * 100),2), "%"),
    `% of Previous` = replace_na((Users / lag(Users)), 1),
    y_top = nrow(.) - row_number() + 3/4,
    y_bot = nrow(.) - row_number()
  )

# Start creating poly df so that in-between trapezoids can be made in geom_polygon()
# 4 different poly dfs each define one point of each trapezoid

poly1 <- df %>%
  mutate(
    y_top = y_bot
  )

poly2 <- df %>%
  mutate(
    `% of Total` = 1 - `% of Total`,
    y_top = y_bot
  )

poly3 <- df %>%
  mutate(
    y_top = lead(y_top),
    `% of Total` = lead(`% of Total`)
  )

poly4 <- df %>%
  mutate(
    y_top = lead(y_top),
    `% of Total` = 1 - lead(`% of Total`)
  )

# Combine all 4 poly dfs
rbind(poly1, poly2, poly3, poly4) -> poly

poly <- poly %>%
  arrange(`% of Total`)

# Make plot
ggplotly(
  tooltip = c("xmax"),
  ggplot(df) +
    # The main rectangles
    geom_rect(
      aes(
        xmax = `% of Total`,
        xmin = 1 - `% of Total`,
        ymax = y_top,
        ymin = y_bot,
        fill = Stage
      )
    ) +
    geom_polygon(
      data=poly,
      aes(
        group=Stage,
        x = `% of Total`,
        y = y_top,
        fill=Stage,
        alpha = 1/2
      )
    ) +
    geom_text(
      aes(
        1/2, y_top - 3/8,
        label= paste(Users, "\n % of Total:", `% Label`)
      )
    ) +
    scale_fill_brewer(palette="Set2") +
    theme_void() +
    theme(
      legend.position = "right",
      panel.grid.major = element_blank(),
      axis.line = element_blank()
    )
) #%>% 
  #periscope.plotly()