Some More SQL Essentials

Contents

Some More SQL Essentials#

DISTINCT#

Returns unique values from a column, removing duplicates.

-- Get unique customer IDs
SELECT DISTINCT customer_id
FROM sales;
import pandas as pd

query_select = """
SELECT DISTINCT customer_id
FROM sales;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 7
      1 import pandas as pd
      3 query_select = """
      4 SELECT DISTINCT customer_id
      5 FROM sales;
      6 """
----> 7 df_select = pd.read_sql_query(query_select, conn)
      8 print(df_select)

NameError: name 'conn' is not defined

COUNT#

Counts the number of rows that satisfy a condition.

-- Count total sales
SELECT COUNT(*) AS total_sales
FROM sales;

-- Count number of unique customers
SELECT COUNT(DISTINCT customer_id) AS unique_customers
FROM sales;
query_select = """
SELECT COUNT(DISTINCT customer_id) AS unique_customers
FROM sales;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
   unique_customers
0                 3

LIMIT#

Restricts the number of rows returned, useful for sampling or previewing data.

-- Get the 10 most recent sales
SELECT *
FROM sales
ORDER BY sale_date DESC
LIMIT 10;

```sql
-- Count unique customers but only show the first 5 results
SELECT customer_id, COUNT(*) AS num_sales
FROM sales
GROUP BY customer_id
ORDER BY num_sales DESC
LIMIT 5;
query_select = """
SELECT customer_id, COUNT(*) AS num_sales
FROM sales
GROUP BY customer_id
ORDER BY num_sales DESC
LIMIT 5;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
   customer_id  num_sales
0          102          2
1          101          2
2          103          1

Aggregations and Filtering

  • Use aggregation functions like SUM(), COUNT(), AVG(), MIN(), MAX() to summarize data.

  • GROUP BY allows you to compute metrics per category (e.g., total sales per customer).

  • HAVING filters aggregated results (useful when you want to filter groups, unlike WHERE which filters raw rows).

-- Find total sales per customer
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id;

-- Find customers with total sales greater than 200
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id
HAVING SUM(amount) > 200;
query_select = """
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id
HAVING SUM(amount) > 200;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
   customer_id  total_sales
0          101       225.25
1          102       401.00