generated from github/codespaces-blank
-
Notifications
You must be signed in to change notification settings - Fork 0
/
intermediate_sql_notes.sql
293 lines (245 loc) · 7.92 KB
/
intermediate_sql_notes.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*
Datacamp - Intermediate SQL
Notes & Exercises
Uses several different datasets
12/2/22
Ashley Joyner
*/
--COUNT
-- Count the number of records in the people table
SELECT COUNT(*) AS count_records
FROM people;
-- Count the number of birthdates in the people table
SELECT COUNT(birthdate) AS count_birthdate
FROM people;
-- Count the languages and countries represented in the films table
SELECT COUNT(language) AS count_languages,
COUNT(country) AS count_countries
FROM films;
--DISTINCT
-- Return the unique countries from the films table
SELECT DISTINCT country
FROM films;
-- Return the unique countries from the films table
SELECT COUNT(DISTINCT country) AS count_distinct_countries
FROM films;
-- >, <, =, <>
-- Select film_ids and imdb_score with an imdb_score over 7.0
SELECT film_id,
imdb_score
FROM reviews
WHERE imdb_score > 7.0;
-- Select film_ids and facebook_likes for ten records with less than 1000 likes
SELECT film_id,
facebook_likes
FROM reviews
WHERE facebook_likes < 1000
LIMIT 10;
-- Count the records with at least 100,000 votes
SELECT COUNT(*) AS films_over_100k_votes
FROM reviews
WHERE num_votes >= 100000;
-- Count the Spanish-language films
SELECT COUNT(language) AS count_spanish
FROM films
WHERE language = 'Spanish';
--AND
-- Select the title and release_year for all German-language films released before 2000
SELECT title,
release_year
FROM films
WHERE language = 'German'
AND release_year < 2000;
-- Update the query to see all German-language films released after 2000
SELECT title,
release_year
FROM films
WHERE release_year > 2000
AND language = 'German';
-- Select all records for German-language films released after 2000 and before 2010
SELECT *
FROM films
WHERE language = 'German'
AND release_year > 2000 AND release_year < 2010;
--OR
-- Find the title and year of films from the 1990 or 1999
SELECT title,
release_year
FROM films
WHERE (release_year = 1990 OR release_year = 1999)
-- Add a filter to see only English or Spanish-language films
AND (language = 'English' OR language = 'Spanish')
-- Filter films with more than $2,000,000 gross
AND gross > 2000000;
--BETWEEN
-- Select the title and release_year for films released between 1990 and 2000
SELECT title,
release_year
FROM films
WHERE release_year BETWEEN 1990 AND 2000
-- Narrow down your query to films with budgets > $100 million
AND budget > 100000000
-- Amend the query to include Spanish or French-language films
AND (language = 'Spanish' OR language = 'French');
--LIKE
-- Select the names that start with B
SELECT name
FROM people
WHERE name LIKE 'B%';
-- Select the names that have r as the second letter
SELECT name
FROM people
WHERE name LIKE '_r%';
--NOT LIKE
SELECT name
FROM people
-- Select names that don't start with A
WHERE name NOT LIKE 'A%';
--IN
-- Find the title and release_year for all films over two hours in length released in 1990 and 2000
SELECT title,
release_year
FROM films
WHERE duration > 120
AND release_year IN (1990, 2000);
-- Find the title and language of all films in English, Spanish, and French
SELECT title,
language
FROM films
WHERE language IN ('English', 'Spanish', 'French');
-- Find the title, certification, and language all films certified NC-17 or R that are in English, Italian,
--or Greek
SELECT title,
certification,
language
FROM films
WHERE certification IN ('NC-17', 'R')
AND language IN ('English', 'Italian', 'Greek');
-- Count the unique titles
SELECT COUNT(DISTINCT title) AS nineties_english_films_for_teens
FROM films
-- Filter to release_years to between 1990 and 1999
WHERE release_year BETWEEN 1990 AND 1999
-- Filter to English-language films
AND language = 'English'
-- Narrow it down to G, PG, and PG-13 certifications
AND certification IN ('G', 'PG', 'PG-13');
--NULL
-- List all film titles with missing budgets
SELECT title AS no_budget_info
FROM films
WHERE budget IS NULL;
-- Count the number of films we have language data for
SELECT COUNT(*) AS count_language_known
FROM films
WHERE language IS NOT NULL;
--Aggregations
-- Query the sum of film durations
SELECT SUM(duration) AS total_duration
FROM films;
-- Calculate the average duration of all films
SELECT AVG(duration) AS average_duration
FROM films;
-- Find the latest release_year
SELECT MAX(release_year) AS latest_year
FROM films;
-- Find the duration of the shortest film
SELECT MIN(duration) AS shortest_film
FROM films;
-- Calculate the sum of gross from the year 2000 or later
SELECT SUM(gross) AS total_gross
FROM films
WHERE release_year >= 2000;
-- Calculate the average gross of films that start with A
SELECT AVG(gross) AS avg_gross_A
FROM films
WHERE title LIKE 'A%';
-- Calculate the lowest gross film in 1994
SELECT MIN(gross) lowest_gross
FROM films
WHERE release_year = 1994;
-- Calculate the highest gross film released between 2000-2012
SELECT MAX(gross) AS highest_gross
FROM films
WHERE release_year BETWEEN 2000 AND 2012;
--ROUND
-- Round the average number of facebook_likes to one decimal place
SELECT ROUND(AVG(facebook_likes),1) AS avg_facebook_likes
FROM reviews;
-- Calculate the average budget rounded to the thousands
SELECT ROUND(AVG(budget),-3) AS avg_budget_thousands
FROM films;
-- +, -, *, /
-- Calculate the title and duration_hours from films
SELECT title,
duration/60.0 AS duration_hours
FROM films;
-- Calculate the percentage of people who are no longer alive
SELECT COUNT(deathdate) * 100.0 / COUNT(*) AS percentage_dead
FROM people;
-- Find the number of decades in the films table
SELECT (MAX(release_year) - MIN(release_year)) / 10.0 AS number_of_decades
FROM films;
-- Round duration_hours to two decimal places
SELECT title, ROUND(duration / 60.0, 2) AS duration_hours
FROM films;
--ORDER BY
-- Select name from people and sort alphabetically
SELECT name
FROM people
ORDER BY name;
-- Select the title and duration from longest to shortest film
SELECT title, duration
FROM films
ORDER BY duration DESC;
-- Select the release year, duration, and title sorted by release year and duration
SELECT release_year, duration, title
FROM films
ORDER BY release_year, duration;
-- Select the certification, release year, and title sorted by certification and release year
SELECT certification, release_year, title
FROM films
ORDER BY certification, release_year;
--GROUP BY
-- Find the release_year and film_count of each year
SELECT release_year, COUNT(*) AS film_count
FROM films
GROUP BY release_year;
-- Find the release_year and average duration of films for each year
SELECT release_year, AVG(duration) AS avg_duration
FROM films
GROUP BY release_year;
-- Find the release_year, country, and max_budget, then group and order by release_year and country
SELECT release_year, country, MAX(budget) AS max_budget
FROM films
GROUP BY release_year, country;
--Which release_year had the most language diversity?
SELECT release_year, COUNT(DISTINCT language) AS language_diversity
FROM films
GROUP BY release_year
ORDER BY language_diversity DESC;
-- Select the country and distinct count of certification as certification_count
SELECT country, COUNT(DISTINCT certification) AS certification_count
FROM films
-- Group by country
GROUP BY country
-- Filter results to countries with more than 10 different certifications
HAVING COUNT(DISTINCT certification) > 10;
-- Select the country and average_budget from films
SELECT country, AVG(budget) AS average_budget
FROM films
-- Group by country
GROUP BY country
-- Filter to countries with an average_budget of more than one billion
HAVING AVG(budget) > 1000000000
-- Order by descending order of the aggregated budget
ORDER BY average_budget DESC;
--average budget and gross earnings for films each year after 1990 if the average budget is greater than
--60 million.
SELECT release_year, AVG(budget) AS avg_budget, AVG(gross) AS avg_gross
FROM films
WHERE release_year > 1990
GROUP BY release_year
HAVING AVG(budget) > 60000000
ORDER BY avg_gross DESC
LIMIT 1;