-
Notifications
You must be signed in to change notification settings - Fork 1
/
Linear_Regression_Activity.py
99 lines (48 loc) · 1.38 KB
/
Linear_Regression_Activity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
# coding: utf-8
# In[17]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
import pandas as pd
import statsmodels.api as sm
get_ipython().run_line_magic('matplotlib', 'inline')
# In[6]:
df = pd.read_csv(r'C:\Users\yunus\Downloads\cereal.csv')
df
# In[15]:
print(df.isnull().sum())
# In[7]:
df.describe()
# In[64]:
df[['sugars','vitamins']].plot(kind='bar',color=['green','orange'])
plt.xlabel('cereals')
plt.ylabel('bins')
plt.show()
# In[63]:
conditions = [(df['mfr'] == 'N'),
(df['mfr'] == 'Q'), (df['mfr'] == 'K'),
(df['mfr'] == 'R'), (df['mfr'] == 'G'), (df['mfr'] == 'P'),
(df['mfr'] == 'A')]
choices = ['Nabisco','Qualer Oats','Kellogs','Raslston Purina','General Mills','Post','American Home Foods Products']
df['full name'] = np.select(conditions,choices, default=0)
print(df)
# In[61]:
sns.countplot(y='full name', data = df)
plt.show()
# In[29]:
y = df['rating']
x = df[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups']]
x.shape, y.shape
# In[40]:
mod = sm.OLS(y, x)
res = mod.fit()
print(res.summary())
# In[50]:
sns.lmplot(x='rating', y='sugars', data = df)
sns.lmplot(x='rating', y='vitamins', data = df)
# In[24]:
corr = df.corr(method='pearson')
corr
# In[ ]: