Some More SQL Essentials#
DISTINCT#
Returns unique values from a column, removing duplicates.
-- Get unique customer IDs
SELECT DISTINCT customer_id
FROM sales;
import pandas as pd
query_select = """
SELECT DISTINCT customer_id
FROM sales;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[1], line 7
1 import pandas as pd
3 query_select = """
4 SELECT DISTINCT customer_id
5 FROM sales;
6 """
----> 7 df_select = pd.read_sql_query(query_select, conn)
8 print(df_select)
NameError: name 'conn' is not defined
COUNT#
Counts the number of rows that satisfy a condition.
-- Count total sales
SELECT COUNT(*) AS total_sales
FROM sales;
-- Count number of unique customers
SELECT COUNT(DISTINCT customer_id) AS unique_customers
FROM sales;
query_select = """
SELECT COUNT(DISTINCT customer_id) AS unique_customers
FROM sales;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
unique_customers
0 3
LIMIT#
Restricts the number of rows returned, useful for sampling or previewing data.
-- Get the 10 most recent sales
SELECT *
FROM sales
ORDER BY sale_date DESC
LIMIT 10;
```sql
-- Count unique customers but only show the first 5 results
SELECT customer_id, COUNT(*) AS num_sales
FROM sales
GROUP BY customer_id
ORDER BY num_sales DESC
LIMIT 5;
query_select = """
SELECT customer_id, COUNT(*) AS num_sales
FROM sales
GROUP BY customer_id
ORDER BY num_sales DESC
LIMIT 5;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
customer_id num_sales
0 102 2
1 101 2
2 103 1
Aggregations and Filtering
Use aggregation functions like
SUM()
,COUNT()
,AVG()
,MIN()
,MAX()
to summarize data.GROUP BY
allows you to compute metrics per category (e.g., total sales per customer).HAVING
filters aggregated results (useful when you want to filter groups, unlikeWHERE
which filters raw rows).
-- Find total sales per customer
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id;
-- Find customers with total sales greater than 200
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id
HAVING SUM(amount) > 200;
query_select = """
SELECT customer_id, SUM(amount) AS total_sales
FROM sales
GROUP BY customer_id
HAVING SUM(amount) > 200;
"""
df_select = pd.read_sql_query(query_select, conn)
print(df_select)
customer_id total_sales
0 101 225.25
1 102 401.00