forked from lleozhang/IMDB-movies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
第一步预处理.py
35 lines (35 loc) · 1.35 KB
/
第一步预处理.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
import pandas as pd
import json
movie=pd.read_csv("tmdb_5000_movies.csv")
credit=pd.read_csv("tmdb_5000_credits.csv")
movie["genres"]=movie["genres"].apply(json.loads)
movie["keywords"]=movie["keywords"].apply(json.loads)
movie["production_companies"]=movie["production_companies"].apply(json.loads)
movie["production_countries"]=movie["production_countries"].apply(json.loads)
movie["spoken_languages"]=movie["spoken_languages"].apply(json.loads)
for index,i in zip(movie.index,movie["genres"]):
list1=[]
for j in range(len(i)):
list1.append((i[j]["name"]))
movie.loc[index,"genres"]=str(list1)
for index,i in zip(movie.index,movie["keywords"]):
list2=[]
for j in range(len(i)):
list2.append((i[j]["name"]))
movie.loc[index,"keywords"]=str(list2)
for index,i in zip(movie.index,movie["production_companies"]):
list3=[]
for j in range(len(i)):
list3.append((i[j]["name"]))
movie.loc[index,"production_companies"]=str(list3)
for index,i in zip(movie.index,movie["production_countries"]):
list4=[]
for j in range(len(i)):
list4.append((i[j]["name"]))
movie.loc[index,"production_countries"]=str(list4)
for index,i in zip(movie.index,movie["spoken_languages"]):
list5=[]
for j in range(len(i)):
list5.append((i[j]["name"]))
movie.loc[index,"spoken_languages"]=str(list5)