-
Notifications
You must be signed in to change notification settings - Fork 1
/
titantic_altered.py
31 lines (23 loc) · 1.01 KB
/
titantic_altered.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
import numpy as np
dataset_url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
df = pd.read_csv(dataset_url)
df
print(df.dtypes)
for column in df.columns:
print(f"Value counts for the '{column}' column:")
print(df[column].value_counts())
print()
# Check the data type and number of unique values for each column
for column in df.columns:
num_unique_values = df[column].nunique()
data_type = df[column].dtype
if num_unique_values <= 10:
print(f"The '{column}' column is likely categorical, as it has {num_unique_values} unique values and a data type of {data_type}.")
else:
print(f"The '{column}' column is likely not categorical, as it has {num_unique_values} unique values and a data type of {data_type}.")
# Convert the categorical columns to one-hot encoded columns
df2 = pd.get_dummies(df, columns=['Survived','Pclass','Sex','SibSp','Parch','Embarked'], dtype=int)
df2
df2.dtypes
df2.to_csv('titanic_altered.csv', index=False)