Open In Colab

Notebook Created by: David Rusho (Github Blog | Tableau | Linkedin)

About the Data

Data Source: data.iowa.gov

Provided by: Iowa Department of Commerce, Alcoholic Beverages Division

Data Created: November 7, 2014

Last Updated: July 1, 2021

This dataset contains the spirits purchase information of Iowa Class ā€œEā€ liquor licensees by product and date of purchase from January 1, 2014 to current. The dataset can be used to analyze total spirits sales in Iowa of individual products at the store level.

Class E liquor license, for grocery stores, liquor stores, convenience stores, etc., allows commercial establishments to sell liquor for off-premises consumption in original unopened containers.

Google BigQuery (SQL Queries)

Count Table Rows

# query: count table rows and shows years
row_count = pd.io.gbq.read_gbq(
    f"""
 SELECT
    COUNT(*) as total_rows
  FROM {dataloc}
  LIMIT 1""",
    project_id=pjt_id,

)

row_count
total_rows
0 19118960

Total Sales by Years

yrs = pd.io.gbq.read_gbq(
    f"""SELECT
              DISTINCT(FORMAT_DATE('%Y', date)) AS years,
              ROUND(SUM(sale_dollars),2) AS total_sales
          FROM {dataloc}
          GROUP BY
              years
          ORDER BY
              years DESC 
          LIMIT 9""",
    project_id=pjt_id,)

yrs.sort_values(by='years',inplace=True)

yrs["total_sales"] = round(yrs["total_sales"]/1000000,2)

fig = px.line(yrs, x="years", y="total_sales", 
              text="total_sales",
              line_shape='spline')

fig.update_traces(textposition="bottom right")

fig.update_layout(
    title={
        "text": f"Total Sales (in millions) by Year",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="")


fig.show()

Sales by Date (Top 5)

# query: top 5 sales by date 
sum_sales_date = pd.io.gbq.read_gbq(
    f"""
  SELECT
    date,
    FORMAT_DATE('%A', date) AS day_name,
    ROUND(SUM(sale_dollars),2) AS total_sales,
  FROM
    {dataloc}
  GROUP BY
    date
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 


sum_sales_date.head(5)
date day_name total_sales
0 2013-10-04 Friday 3,516,318.05
1 2013-10-11 Friday 3,278,998.01
2 2020-09-29 Tuesday 2,420,417.11
3 2019-07-31 Wednesday 2,357,939.86
4 2019-09-05 Thursday 2,290,099.19

(2012-2020) Sales by Weekday

Total liquior sales by weekdays tend to go in order of the week, with Sunday have the least amount of sales. This could be due to city/county restrictions placed on alcohol sales for these days.

# query: top 5 sales by weekday name
sum_sales_wkday = pd.io.gbq.read_gbq(
    f"""
  SELECT
    FORMAT_DATE('%A', date) AS day,
    ROUND(SUM(sale_dollars),2) AS total_sales,
  FROM
    {dataloc}
  GROUP BY
    day
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,
)

# create bar chart
fig = px.bar(sum_sales_wkday, y="total_sales", x="day", text="total_sales")

# update bar markers
fig.update_traces(textposition="outside", marker_color="rgb(47,138,196)")  # blue color

# update plot details
fig.update_layout(
    {"plot_bgcolor": "rgba(255,255,255, 0.9)"},  # white background
    title={
        "text": f"Total Liquor Sales  (2012-2020) by Weekday",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="",
    xaxis={
        "categoryarray": [
            "Monday",
            "Tuesday",
            "Wednesday",
            "Thursday",
            "Friday",
            "Saturday",
            "Sunday",
        ]
    },  # remove axis titles
)


fig.show()

(2012-2020) Sales by Month

# query: top 5 sales by month name
sum_sales_mth = pd.io.gbq.read_gbq(
    f"""
  SELECT
    FORMAT_DATE('%B', date) AS month,
    ROUND(SUM(sale_dollars),2) AS total_sales,
  FROM
    {dataloc}
  GROUP BY
    month
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,
)


# create bar chart
fig = px.bar(sum_sales_mth, x="total_sales", y="month", text="total_sales")

months = [
    "December",
    "November",
    "October",
    "September",
    "August",
    "July",
    "June",
    "May",
    "April",
    "March",
    "February",
    "January",
]

# update bar markers
fig.update_traces(textposition="inside", marker_color="rgb(47,138,196)")  # blue color

# update plot details
fig.update_layout(
    {"plot_bgcolor": "rgba(255,255,255, 0.9)"},  # white background
    title={
        "text": f"Total Liquor Sales  (2012-2020) by Month",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="",
    yaxis={"categoryarray": months},  # remove axis titles
)


fig.show()

(2012-2020) Sales by Day for December (Top 5)

# query: top 5 sales by month (December)
dec_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    EXTRACT(DAY FROM date) AS day,
    ROUND(SUM(sale_dollars),2) AS total_sales,
  FROM
     {dataloc}
  WHERE
    FORMAT_DATE('%B', date) = 'December'
  GROUP BY
    day
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 

dec_sum_sales.head(5)
day total_sales
0 26 11,284,143.90
1 27 10,255,324.46
2 19 9,622,026.31
3 21 9,506,483.97
4 18 9,504,026.97

(2012-2020) Sales by Day for June (Top 5)

# query: top 5 sales by month (June)
june_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    EXTRACT(DAY FROM date) AS day,
    ROUND(SUM(sale_dollars),2) AS total_sales,
  FROM
     {dataloc}
  WHERE
    FORMAT_DATE('%B', date) = 'June'
  GROUP BY
    day
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 

june_sum_sales.head(5)
day total_sales
0 26 12,997,654.91
1 27 11,085,004.80
2 25 10,683,658.61
3 5 9,840,296.04
4 4 9,823,520.72

(2012-2020) Sales by City

# query: top 5 sales by city
city_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    LOWER(city) AS city,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales,
  FROM
     {dataloc}
  GROUP BY
    city
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 

# title case for city col
city_sum_sales.city = city_sum_sales.city.str.title()

#convert total_sales to float
city_sum_sales.total_sales = city_sum_sales.total_sales.astype('float')

# city_sum_sales.head()

# create bar chart
fig = px.bar(city_sum_sales.head(10), y="total_sales", x="city",text="total_sales")

# update bar markers
fig.update_traces(textposition="outside", 
                  marker_color="rgb(81,162,213)")  # blue color

# update plot details
fig.update_layout(
    {"plot_bgcolor": "rgba(255,255,255, 0.9)"},  # white background
    title={
        "text": f"Total Liquor Sales (2012-2020) by City",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="",
)


fig.show()

(2012-2020) Sales by County

# query: top 5 sales by county
county_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    LOWER(county) AS county,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales,
  FROM
     {dataloc}
  GROUP BY
    county
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 

# title case for county col
county_sum_sales.county = county_sum_sales.county.str.title()

#convert total_sales to float
county_sum_sales.total_sales = county_sum_sales.total_sales.astype('float')

# county_sum_sales.head()

# create bar chart
fig = px.bar(county_sum_sales.head(10), y="total_sales", x="county",text="total_sales")

# update bar markers
fig.update_traces(textposition="outside", 
                  marker_color="rgb(81,162,213)")  # blue color

# update plot details
fig.update_layout(
    {"plot_bgcolor": "rgba(255,255,255, 0.9)"},  # white background
    title={
        "text": f"Total Liquor Sales (2012-2020) by County",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="",
)


fig.show()

(2012-2020) Sales by Vendor (Top 5)

# query: top 5 sales by vendor 
vendor_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    LOWER(vendor_name) AS vendor,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales
  FROM
    {dataloc}
  GROUP BY
    vendor
  ORDER BY
    total_sales DESC
  """,
    project_id=pjt_id,) 

# title case for vendor col
vendor_sum_sales.vendor = vendor_sum_sales.vendor.str.title()

#convert total_sales to float
vendor_sum_sales.total_sales = vendor_sum_sales.total_sales.astype('float')

vendor_sum_sales.head()
vendor total_sales
0 Diageo Americas 551,505,642.00
1 Jim Beam Brands 199,903,939.00
2 Sazerac Company Inc 115,002,985.00
3 Pernod Ricard Usa 93,186,264.00
4 Pernod Ricard Usa/Austin Nichols 92,119,352.00

(2012-2020) Sales by Item (Top 5)

# query: top 5 sales by item 

item_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    LOWER(item_description) AS item,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales
  FROM
    {dataloc}
  GROUP BY
    item
  ORDER BY
    total_sales DESC
     """,
    project_id=pjt_id,) 

# title case for item col
item_sum_sales.item = item_sum_sales.item.str.title()

#convert total_sales to float
item_sum_sales.total_sales = item_sum_sales.total_sales.astype('float')

item_sum_sales.head()
item total_sales
0 Black Velvet 103,941,337.00
1 Captain Morgan Spiced Rum 72,872,932.00
2 Titos Handmade Vodka 67,739,561.00
3 Jack Daniels Old #7 Black Lbl 67,262,715.00
4 Fireball Cinnamon Whiskey 55,922,339.00

(2012-2020) Sales by Item and Category (Top 5)

# query: top 5 sales by item & category

item_cat_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    LOWER(item_description) AS item,
    LOWER(category_name) AS category,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales
  FROM
    {dataloc}
  GROUP BY
    item,category
  ORDER BY
    total_sales DESC
       """,
    project_id=pjt_id,) 

# title case for item col
item_cat_sum_sales.item = item_cat_sum_sales.item.str.title()

# title case for category col
item_cat_sum_sales.category = item_cat_sum_sales.category.str.title()

#convert total_sales to float
item_cat_sum_sales.total_sales = item_cat_sum_sales.total_sales.astype('float')

item_cat_sum_sales.head()
item category total_sales
0 Black Velvet Canadian Whiskies 103,941,337.00
1 Captain Morgan Spiced Rum Spiced Rum 72,872,932.00
2 Jack Daniels Old #7 Black Lbl Tennessee Whiskies 67,262,715.00
3 Titos Handmade Vodka American Vodkas 60,750,450.00
4 Fireball Cinnamon Whiskey Whiskey Liqueur 55,922,339.00

Sales by Item and Category for the Year 2020

# query: top 5 sales by item & category for 2020

item_cat_sum_sales_2020 = pd.io.gbq.read_gbq(
    f"""
  SELECT
    FORMAT_DATE('%Y', date) AS year,
    LOWER(item_description) AS item,
    LOWER(category_name) AS category,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales
  FROM
    {dataloc}
  WHERE
    FORMAT_DATE('%Y', date) LIKE ('%2020%')
  GROUP BY
    item,
    category,
    year
  ORDER BY
    total_sales DESC
       """,
    project_id=pjt_id,) 

# title case for item col
item_cat_sum_sales_2020.item = item_cat_sum_sales_2020.item.str.title()

# title case for category col
item_cat_sum_sales_2020.category = item_cat_sum_sales_2020.category.str.title()

#convert total_sales to float
item_cat_sum_sales_2020.total_sales = item_cat_sum_sales_2020.total_sales.astype('float')

item_2020 = (
    item_cat_sum_sales_2020.sort_values(by="total_sales", ascending=False)
    .head(10)
    .copy()
)
item_2020 = item_2020.sort_values(by="total_sales", ascending=True)

fig = px.bar(
    item_2020,
    x="total_sales",
    y="item",
    orientation="h",
    text="total_sales",
    color="category",
)

# update bar markers
fig.update_traces(textposition="inside")

# update plot details
fig.update_layout(
    {"plot_bgcolor": "rgba(255,255,255, 0.9)"},  # white background
    title={
        "text": f"2020 Sales by Item & Category ",
        "y": 0.98,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    },
    yaxis_title="" #remove axis titles
)

fig.show()

Sales by Item and Category for the Past 10 Years (Top 10)

# query: top 5 sales by item & category for past 10 years

item_cat_sum_sales = pd.io.gbq.read_gbq(
    f"""
  SELECT
    FORMAT_DATE('%Y', date) AS year,
    LOWER(item_description) AS item,
    LOWER(category_name) AS category,
    ROUND(CAST(SUM(sale_dollars) AS numeric),0) AS total_sales
  FROM
    {dataloc}
  GROUP BY
    item,
    category,
    year
  ORDER BY
    total_sales DESC
       """,
    project_id=pjt_id,) 

# title case for item col
item_cat_sum_sales.item = item_cat_sum_sales.item.str.title()

# title case for category col
item_cat_sum_sales.category = item_cat_sum_sales.category.str.title()

#convert total_sales to float
item_cat_sum_sales.total_sales = item_cat_sum_sales.total_sales.astype('float')

item_cat_sum_sales.head(10)
year item category total_sales
0 2020 Titos Handmade Vodka American Vodkas 20,148,376.00
1 2018 Titos Handmade Vodka American Vodkas 14,943,390.00
2 2017 Black Velvet Canadian Whiskies 13,258,183.00
3 2018 Black Velvet Canadian Whiskies 13,079,561.00
4 2016 Black Velvet Canadian Whiskies 12,378,453.00
5 2019 Titos Handmade Vodka American Vodkas 12,125,095.00
6 2015 Black Velvet Canadian Whiskies 12,113,647.00
7 2014 Black Velvet Canadian Whiskies 11,697,936.00
8 2020 Black Velvet Canadian Whiskies 11,308,314.00
9 2013 Black Velvet Canadian Whiskies 11,096,985.00

References

Google Cloud Docs: SQL Date Formating