BigQuery Examples
BigQuery Examples
Taxi Cab Data Examples
# Chicago Taxicab Data - Average Fare (aggregation)
SELECT avg(fare) AS avgfare
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`;
# Chicago Taxicab Data - Average Fare and Tips by Payment Type (aggregation by Payment Type)
SELECT payment_type, COUNT(fare) as numtrips, avg(fare) AS avgfare, avg(tips) as avgtip
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
GROUP BY payment_type
ORDER BY payment_type;
# Chicago Taxicab Data - Average Fare and Tips by Payment Type (aggregation by Payment Type)
SELECT taxi_id, COUNT(unique_key) AS numtrips, array_agg(unique_key)
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
GROUP BY taxi_id
ORDER BY numtrips DESC;
# Chicago Taxicab Data - Limit 5 example
SELECT *
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
LIMIT 5;
# Chicago Taxicab Data - Rows about 3 specific trips
SELECT *
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
WHERE
  unique_key IN (
    "ec8df27cfc49534d1e866ff323095903be8a3c8a",
    "d0c0d800d4d8c2cd0a300676026468faf06aebc8",
    "21cd4686b5e709f1069d3af7395a01b83395d2cc");
# Chicago Taxicab Data - Aggreation
SELECT taxi_id, ARRAY_AGG(STRUCT(puyear, payment_type,numtrips,avgfare,avgtips)
ORDER BY puyear, payment_type) AS payment_summary
FROM (
  SELECT taxi_id, payment_type, extract(year from trip_start_timestamp) as puyear,
    COUNT(DISTINCT unique_key) AS numtrips, AVG(fare) as avgfare, AVG(tips) as avgtips
  FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
  GROUP BY taxi_id,payment_type,puyear )
GROUP BY taxi_id
ORDER BY taxi_id;
# Create a dataset named taxidemo
CREATE schema taxidemo OPTIONS (location = "US");
# Make a copy of a table into local project (free operation)
CREATE TABLE taxidemo.taxitrips
  COPY bigquery-public-data.chicago_taxi_trips.taxi_trips;
# Make a copy as result of a select (cost of select operation)
CREATE TABLE taxidemo.taxicopyviaselect
ASÂ
select * from bigquery-public-data.chicago_taxi_trips.taxi_trips
order by taxi_id
where trip
Simple Names Table Example (Creating Tables)
# Manually, create a dataset called samples_dev
# Alternate: create a dataset called samples_dev using SQL
CREATE schema sample_dev OPTIONS (location = "US");
# Create a table named names using this schema: firstname STRING, lastname STRING, idnumber INT64
# First Time - please create manually (using wizard)
# Second Time - create using SQL codeÂ
CREATE OR REPLACE TABLE sample_dev.names(
  firstname string, lastname string, idnumber int64);
# Insert several rows
INSERT INTO sample_dev.names
  (firstname, lastname, idnumber)
VALUES
  ('firstname1', 'lastname1', 12345),
  ('firstname2', 'lastname2', 23456),
  ('firstname3', 'lastname3', 34567),
  ('firstname4', 'lastname4', 45678),
  ('firstname5', 'lastname5', 56789)
# Query sample_dev.names table
SELECT * FROM sample_dev.names;
SSA Names Importing (Importing data various ways)
# Create a dataset called samples_dev using SQL (if it does not already exist)
CREATE schema sample_dev OPTIONS (location = "US");
# Download SSA Names - https://www.ssa.gov/oact/babynames/limits.html
# Direct Download Link https://www.ssa.gov/oact/babynames/state/namesbystate.zip
# Extract to a directory, and remove the .pdf
# Load Data using upload (one file) into sample_dev.namesonestate
#Â Schema is: state string,gender string,year int64,name string,namecount int64
# Load Data using GCS manually into sample_dev.allnames
# Use the Wizard to load all files from gs://roi-hsbc-bigquery/names/*.TXT
# Load Data using SQL Command into sample_dev.namesviasql
LOAD DATA OVERWRITE sample_dev.namesviasql(
      state string, gender string, year int64, name string, namecount int64)
FROMÂ
FILES(format = 'CSV',
    uris = ['gs://roi-hsbc-bigquery/names/*.TXT']);
# Load Data using BQ command into sample_dev.namesviabq
bq load sample_dev.namesviabq \
gs://roi-hsbc-bigquery/names/*.TXT \
"state string,gender string,year int64,name string,namecount int64"
# Simple Array Aggregation (Names and what years they appeared - years as an Array)
SELECT name, array_agg(CAST(year AS string) ORDER BY year) AS years
FROM demo3dataset.allnames
GROUP BY name
ORDER BY name;
# Simple String Aggregation (Names and what years they appeared - years as a String)
SELECT name, string_agg(CAST(year AS string) ORDER BY year) AS years
FROM demo3dataset.allnames
GROUP BY name
ORDER BY name;
Table Partitioning and Clustering
# Create a dataset named taxidemo
CREATE schema taxidemo OPTIONS (location = "US");
# Chicago Taxicab Data - Partition by Date (of trip_start_timestamp)
CREATE OR REPLACE TABLE taxidemo.taxipartitioned
  PARTITION BY DATE(trip_start_timestamp)
AS
SELECT *
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
WHERE trip_start_timestamp between '2014-01-01' and '2023-12-31';
# Chicago Taxicab Data - Partition by Date (of trip_start_timestamp)
CREATE OR REPLACE TABLE taxidemo.taxiclustered
  CLUSTER BY unique_key
ASÂ
SELECT *
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
WHERE trip_start_timestamp BETWEEN '2014-01-01' AND '2023-12-31';
# Chicago Taxicab Data - Partition by MonthDate, Cluster by taxi_id, unique_key
CREATE OR REPLACE TABLE taxidemo.taxipartandcluster
  PARTITION BY DATETIME_TRUNC(trip_start_timestamp, MONTH)
  CLUSTER BY taxi_id, unique_key
AS
SELECT *
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
WHERE trip_start_timestamp BETWEEN '2014-01-01' AND '2023-12-31';
2026 Wikipedia Data Sample Queries
(CAUTION - some queries can exceed 1TB)
# 2026 Wikipedia - Most popular wiki titles (low cardinality)
SELECT wiki, sum(views) AS pageviews
FROM `bigquery-public-data.wikipedia.pageviews_2026`
WHERE TIMESTAMP_TRUNC(datehour, DAY) >= TIMESTAMP("2026-01-01")
GROUP BY wiki
ORDER BY pageviews DESC;
# 2026 Wikipedia - Most popular page titles (high cardinality)
SELECT title, sum(views) AS pageviews
FROM `bigquery-public-data.wikipedia.pageviews_2025`
WHERE TIMESTAMP_TRUNC(datehour, DAY) >= TIMESTAMP("2025-01-01")
GROUP BY title
ORDER BY pageviews DESC
limit 100000000;
INFORMATION SCHEMA Read-Only Views
# Information Schemas about Tables, Partitions and Jobs
SELECT * from dataset.INFORMATION_SCHEMA.TABLES
ORDER BY tablename;
SELECT * from dataset.INFORMATION_SCHEMA.PARTITIONS
ORDER BY tablename, partition;
SELECT * from region-us.INFORMATION_SCHEMA.JOBS;
1929 to 2025 Weather Data Sample Queries (Wildcard Tables and Joins)
# 2025 Weather Data Sample Queries
SELECT stn, wban, avg(temp) AS avgtemp, min(temp) AS mintemp, max(temp) AS maxtemp
FROM `bigquery-public-data.noaa_gsod.gsod2025`
GROUP BY stn, wban
ORDER BY avgtemp DESC;
# 2000 to 2025 Weather Data with Wildcard Tables
SELECT stn, wban, avg(temp) AS avgtemp, min(temp) AS mintemp, max(temp) AS maxtemp
FROM `bigquery-public-data.noaa_gsod.gsod20*`
GROUP BY stn, wban
ORDER BY avgtemp DESC;
# 1929 to 2025 Weather Data with Wildcard Tables
SELECT stn, wban, avg(temp) AS avgtemp, min(temp) AS mintemp, max(temp) AS maxtemp
FROM `bigquery-public-data.noaa_gsod.gsod*`
GROUP BY stn, wban
ORDER BY avgtemp DESC;
# 2000 to 2025 Weather Data with Wildcard Tables and a Join
SELECT s.usaf, w.stn, avg(w.temp) AS avgtemp, s.name, s.country, w.year, min(w.temp) AS mintemp, max(w.temp) AS maxtemp
FROM `bigquery-public-data.noaa_gsod.stations` as s
JOIN `bigquery-public-data.noaa_gsod.gsod20*` as w
On s.usaf=w.stn and s.wban=w.wban
GROUP BY s.usaf, w.stn, s.name, s.country, w.year
ORDER BY avgtemp DESC;