-
Notifications
You must be signed in to change notification settings - Fork 1
/
EDA.sql
162 lines (146 loc) · 5.6 KB
/
EDA.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
-- Database
USE anabig114212_cap;
SHOW TABLES;
-- Table Stats for Employees Table
COMPUTE STATS employees;
SHOW COLUMN STATS employees;
-- 1. A list showing employee number, last name, first name, sex, and salary for each employee
SELECT e.emp_no, last_name, first_name, sex, salary
FROM employees e
JOIN salaries s ON e.emp_no = s.emp_no
LIMIT 30;
-- 2. A list showing first name, last name, and hire date for employees who were hired in 1986.
SELECT first_name, last_name, hire_date
FROM employees
WHERE year(hire_date) = 1986
LIMIT 30;
-- 3. A list showing the manager of each department with the following information: department number, department name, the manager's employee number, last name, first name.
SELECT dm.dept_no, d.dept_name, e.emp_no, e.last_name, e.first_name, t.title
FROM dept_manager dm
JOIN departments d ON dm.dept_no = d.dept_no
JOIN employees e ON dm.emp_no = e.emp_no
JOIN titles t ON e.emp_title_id = t.title_id AND t.title = 'Manager';
-- 4. A list showing the department of each employee with the following information: employee number, last name, first name, and department name.
WITH dept_emp3 AS
(SELECT a.emp_no, a.dept_no
FROM(SELECT *, row_number() over(PARTITION BY emp_no ORDER BY dept_no DESC) rnk FROM dept_emp)a
WHERE a.rnk = 1)
SELECT e.emp_no, last_name, first_name, d.dept_name
FROM employees e
JOIN dept_emp3 de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no
LIMIT 30;
-- 5. A list showing first name, last name, and sex for employees whose first name is "Hercules" and last names begin with "B".
SELECT first_name, last_name, sex
FROM employees
WHERE first_name = 'Hercules' AND last_name LIKE 'B%'
LIMIT 100;
-- 6. A list showing all employees in the Sales department, including their employee number, last name, first name, and department name.
WITH dept_emp3 AS
(SELECT a.emp_no, a.dept_no
FROM(SELECT *, row_number() over(PARTITION BY emp_no ORDER BY dept_no DESC) rnk FROM dept_emp)a
WHERE a.rnk = 1)
SELECT e.emp_no, e.last_name, e.first_name, d.dept_name
FROM employees e
JOIN dept_emp3 de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no AND d.dept_name = 'Sales'
LIMIT 100;
-- 7. A list showing all employees in the Sales and Development departments, including their employee number, last name, first name, and department name.
WITH dept_emp3 AS
(SELECT a.emp_no, a.dept_no
FROM(SELECT *, row_number() over(PARTITION BY emp_no ORDER BY dept_no DESC) rnk FROM dept_emp)a
WHERE a.rnk = 1)
SELECT e.emp_no, e.last_name, e.first_name, d.dept_name
FROM employees e
JOIN dept_emp3 de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no AND d.dept_name IN ('Sales','development')
LIMIT 100;
-- 8. A list showing the frequency count of employee last names, in descending order. (i.e., how many employees share each last name)
SELECT last_name, count(*) AS Last_name_cnt
FROM employees
GROUP BY last_name
ORDER BY Last_name_cnt DESC
LIMIT 100;
-- 11. Calculate employee tenure & show the tenure distribution among the employees
WITH a AS
(SELECT greatest(max(last_date), max(hire_date)) maxDate
FROM employees),
t AS
(SELECT
CASE WHEN last_date IS NULL THEN datediff(a.maxDate, hire_date)
ELSE datediff(last_date, hire_date) END AS Duration,
count(*) AS Duration_cnt
FROM employees,a
GROUP BY Duration)
SELECT Duration_cnt, count(Duration) Duration_Dist FROM t
GROUP BY Duration_cnt
ORDER BY Duration_Dist DESC
LIMIT 100;
-- 12. Count of Employee Status (Currently working or Left) in different departments grouped by gender
WITH dept_emp3 AS
(SELECT a.emp_no, a.dept_no
FROM(SELECT *, row_number() over(PARTITION BY emp_no ORDER BY dept_no DESC) rnk FROM dept_emp)a
WHERE a.rnk = 1)
SELECT
dept_name, sex,
count(Left2) Total_Count,
sum(CASE WHEN Left2 = 0 THEN 1 ELSE 0 END) Working_Count,
sum(Left2) Left_Count
FROM Employees e
JOIN dept_emp3 de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no
GROUP BY dept_name, sex;
-- 13 Max, Min and Avg age of Employees in diffrent departments
WITH a AS
(SELECT greatest(max(last_date), max(hire_date)) maxDate
FROM employees),
dept_emp3 AS
(SELECT a.emp_no, a.dept_no
FROM(SELECT *, row_number() over(PARTITION BY emp_no ORDER BY dept_no DESC) rnk FROM dept_emp)a
WHERE a.rnk = 1)
SELECT dept_name, min(Age), max(Age), avg(Age)
FROM (
SELECT
dept_name, e.emp_no, year(maxDate)-year(birth_date) Age
FROM Employees e, a
JOIN dept_emp3 de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no
)a1
GROUP BY dept_name;
-- 14 Count of Employees in various titles
SELECT
t.title,
count(Salary) Emp_Count
FROM salaries s
JOIN employees e ON s.emp_no = e.emp_no
JOIN titles t ON e.emp_title_id = t.title_id
GROUP BY t.title
ORDER BY Emp_Count DESC;
-- 15 Average Tenure Distribution accross Departments
WITH a AS
(SELECT greatest(max(last_date), max(hire_date)) maxDate
FROM employees)
SELECT
avg(CASE WHEN last_date IS NULL
THEN datediff(maxDate,hire_date)
ELSE datediff(last_date,hire_date) END) AS Avg_Tenure_Years,
dept_name
FROM employees e,a
JOIN dept_emp de ON e.emp_no = de.emp_no
JOIN departments d ON de.dept_no = d.dept_no
GROUP BY dept_name
ORDER BY Avg_Tenure_Years DESC
--16 Average Tenure Distribution accross Titles
WITH a AS
(SELECT greatest(max(last_date), max(hire_date)) maxDate
FROM employees)
SELECT
avg(CASE WHEN last_date IS NULL
THEN datediff(maxDate,hire_date)
ELSE datediff(last_date,hire_date) END) AS Avg_Tenure_Years,
title
FROM employees_at e
JOIN titles t ON e.emp_title_id = t.title_id
GROUP BY title
ORDER BY Avg_Tenure_Years DESC;
exit;