-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1281 lines (976 loc) · 50.9 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<title>Socal graphs and interactions - Project</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
<style>
body,h1,h2,h3,h4,h5,h6 {font-family: "Lato", sans-serif;}
body, html {
height: 100%;
color: #777;
line-height: 1.8;
}
p.normal {
font-style: normal;
color:black;
}
/* Create a Parallax Effect */
.bgimg-1, .bgimg-2, .bgimg-3,.bgimg-4,.bgimg-5,.bgimg-6,.bgimg-7 {
background-attachment: fixed;
background-position: center;
background-repeat: no-repeat;
background-size: cover;
}
/* Slideshow container */
.slideshow-container {
max-width: 1000px;
position: relative;
margin: auto;
}
/* Next & previous buttons */
.prev, .next {
cursor: pointer;
position: absolute;
top: 50%;
width: auto;
padding: 16px;
margin-top: -22px;
color: black;
font-weight: bold;
font-size: 18px;
transition: 0.6s ease;
border-radius: 0 3px 3px 0;
}
/* On hover, add a black background color with a little bit see-through */
.prev:hover, .next:hover {
background-color: rgba(66,217,244,0.8);
}
/* Caption text */
.text {
color: #000000;
font-size: 15px;
padding: 8px 12px;
position: absolute;
bottom: 8px;
width: 100%;
text-align: center;
}
/* Number text (1/3 etc) */
.numbertext {
color: #f2f2f2;
font-size: 12px;
padding: 8px 12px;
position: absolute;
top: 0;
}
/* The dots/bullets/indicators */
.dot {
cursor: pointer;
height: 60px;
width: 60px;
margin: 0 2px;
background-color: #bbb;
border-radius: 50%;
display: inline-block;
transition: background-color 0.6s ease;
text-align: center;
text-decoration: none;
padding: 18px 5px;
}
.active, .dot:hover {
background-color: #42d9f4;
}
/* Fading animation */
.fade {
-webkit-animation-name: fade;
-webkit-animation-duration: 1.5s;
animation-name: fade;
animation-duration: 1.5s;
}
@-webkit-keyframes fade {
from {opacity: .4}
to {opacity: 1}
}
@keyframes fade {
from {opacity: .4}
to
}
/* Position the "next button" to the right */
.next {
right: 0;
border-radius: 3px 0 0 3px;
}
/* First image (Home photo of the gang) */
.bgimg-1 {
/*background-image: url('https://w3schools.com/w3images/parallax1.jpg');
min-height: 100%;*/
background-image: url('http://www.thecordovatimes.com/wp-content/uploads/2017/10/HP-OOTP.jpeg');
min-height: 100%;
}
/* Second image (castle) */
.bgimg-2 {
background-image: url("https://www.techavy.com/wp-content/uploads/2017/04/CMWTqJx.jpg");
min-height: 400px;
}
/* Third image (conlusion photo) */
.bgimg-3 {
background-image: url("https://d1jo0zet24jmxt.cloudfront.net/content/9490/9b67a815b5f3eed934bfdece52348682.jpg");
min-height: 400px;
}
/* Third image (sentimental analysis) */
.bgimg-4 {
background-image: url("https://nerdist.com/wp-content/uploads/2017/01/harry-potter.jpg");
min-height: 400px;
}
/* Third image (community photo) */
.bgimg-5 {
background-image: url("http://cdn.playbuzz.com/cdn/66a71bf2-10bc-4a55-8ac0-068219786f61/9d9b2dc4-924f-435c-90f4-6cf94944b121.jpg");
min-height: 400px;
}
/* Third image (spells photo) */
.bgimg-6 {
background-image: url("https://www.space.ca/wp-content/uploads/2015/10/harry-potter1.jpg");
min-height: 400px;
}
/* Third image (network photo) */
.bgimg-7 {
background-image: url("https://i.pinimg.com/originals/e3/5d/3a/e35d3a7a819e7a582a45eb60cc40b3b3.jpg");
min-height: 400px;
}
.w3-wide {letter-spacing: 10px;}
.w3-hover-opacity {cursor: pointer;}
/* Turn off parallax scrolling for tablets and phones */
@media only screen and (max-device-width: 1024px) {
.bgimg-1, .bgimg-2, .bgimg-3, .bgimg-4, .bgimg-5,.bgimg-6, .bgimg-7 {
background-attachment: scroll;
}
}
</style>
<body>
<!-- Navbar (sit on top) -->
<div class="w3-top">
<div class="w3-bar" id="myNavbar">
<a class="w3-bar-item w3-button w3-hover-black w3-hide-medium w3-hide-large w3-right" href="javascript:void(0);" onclick="toggleFunction()" title="Toggle Navigation Menu">
<i class="fa fa-bars"></i>
</a>
<a href="#home" class="w3-bar-item w3-button">HOME</a>
<a href="#data" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-download"></i> INTRODUCTION</a>
<a href="#network" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-universal-access"></i> NETWORK ANALYSIS</a>
<a href="#community" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-map"></i> COMMUNITY ANALYSIS</a>
<a href="#sentimental" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-book"></i> SENTIMENTAL ANALYSIS</a>
<a href="#frequency" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-th"></i> WORD FREQUENCY</a>
<a href="#conclusion" class="w3-bar-item w3-button w3-hide-small"><i class="fa fa-handshakeann"></i> CONCLUSION</a>
</div>
<!-- Navbar on small screens -->
<div id="navDemo" class="w3-bar-block w3-white w3-hide w3-hide-large w3-hide-medium">
<a href="#data" class="w3-bar-item w3-button" onclick="toggleFunction()">ABOUT</a>
<a href="#community" class="w3-bar-item w3-button" onclick="toggleFunction()">DATASET/a>
<a href="#sentimental" class="w3-bar-item w3-button" onclick="toggleFunction()">CONCLUSION</a>
<a href="#" class="w3-bar-item w3-button">SEARCH</a>
</div>
</div>
<!-- First Parallax Image with Logo Text -->
<div class="bgimg-1 w3-display-container w3-opacity-min" id="home">
<div class="w3-display-middle" style="white-space:nowrap;">
<span class="w3-center w3-padding-large w3-black w3-xlarge w3-wide w3-animate-opacity">HARRY <span class="w3-hide-small">POTTER</span> & THE ANALYSTS</span>
</div>
</div>
<!-- Container (About Section) -->
<div class="w3-content w3-container w3-padding-64" id="data">
<h2 class="w3-center">INTRODUCTION</h2>
<h4 class="w3-center"><em>“It is our choices, Harry, that show what we truly are, far more than our abilities.” — Dumbledore, Harry Potter and the Chamber of Secrets</em></h4>
<p>Let’s jump in the magic universe of Harry Potter! We have a lot of special characters that are waiting to be analysed! Which are the key characters? Which house is it better to belong to? Would you rather be a good or a bad guy? What are the most happy and dark moments in the books and movies? Stay tuned, because we will answer all these questions!
</p>
<body>
<iframe width="854" height="480" src="https://www.youtube.com/embed/0gQwaoOFpzw" frameborder="0" gesture="media" allowfullscreen></iframe>
<body>
<p>
We chose this topic because we are big fans of Harry Potter and we are interested in using our python skills on something we are passionate about. We know the communities and the characters. We also remember crying, being scared and laughing a lot in our childhood while we were watching the movies or reading the books.
</p>
<p>
There is a big universe and a lot of data about Harry Potter. It was easy to find the books and the analysis seemed interesting. There are some words that are typical to Harry Potter such as the spells or locations. So, we could also analyse them. Creating a network in this universe made also a lot of sense to us.
</p>
<p>
The Network was created with the data from the <a href="http://harrypotter.wikia.com/wiki/Main_Page" target="_blank">wikia</a> of Harry Potter.</p>
<p>
We will create a link from node a to node b if the name of node b appears on the wikia page of node a.
</p>
<p>
<center><img src="Figures/Community/Big_graph.png" width="80%" height="80%"
alt = "The network of the main characters" /></center>
</p>
<center><p class="normal"> <b>This is the graph from which we have been working, each node is a character </b></p></center>
<h4>Notebook</h4>
This is the <a href="http://nbviewer.jupyter.org/github/annasmarties/annasmarties.github.io/blob/master/ipynb/General-Notebook.ipynb">notebook</a> for the project and the github:
<a href="
https://github.com/annasmarties">
https://github.com/annasmarties/annasmarties.github.io
</a></p>
<p>These is the links for the dataset:</p>
<p>
<a href="
http://harrypotter.wikia.com/wiki/Special:Statistics">
The Harry Potter API
</a>
</p>
<p>
<a href="
https://github.com/annasmarties/annasmarties.github.io/tree/master/Dataset">
The dataset on github, including the books, subtitles and spell overview.
</a>
</p>
</p>
<div class="w3-row">
<div class="w3-col m6 w3-center w3-padding-large">
<!-- <p><b><i class="fa fa-user w3-margin-right"></i>My Name</b></p><br>
<img src="/w3images/avatar_hat.jpg" class="w3-round w3-image w3-opacity w3-hover-opacity-off" alt="Photo of Me" width="500" height="333">-->
</div>
<!-- Hide this text on small devices -->
<div class="w3-col m6 w3-hide-small w3-padding-large">
</div>
</div>
<p class="w3-large w3-center w3-padding-16">Harry Potter is really good at:</p>
<p class="w3-wide"><i class="fa fa-birthday-cake"></i>Making spells</p>
<div class="w3-light-grey">
<div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:90%">90%</div>
</div>
<p class="w3-wide"><i class="fa fa-empire"></i>Speaking british</p>
<div class="w3-light-grey">
<div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:85%">85%</div>
</div>
<p class="w3-wide"><i class="fa fa-bomb"></i>Killing</p>
<div class="w3-light-grey">
<div class="w3-container w3-padding-small w3-dark-grey w3-center" style="width:75%">75%</div>
</div>
</div>
</div>
<div class="w3-row w3-center w3-dark-grey w3-padding-16">
<div class="w3-quarter w3-section">
<span class="w3-xlarge">415</span><br>
Characters
</div>
<div class="w3-quarter w3-section">
<span class="w3-xlarge">4</span><br>
Clusters
</div>
<div class="w3-quarter w3-section">
<span class="w3-xlarge">4673</span><br>
Links
</div>
<div class="w3-quarter w3-section">
<span class="w3-xlarge">The Deathly Hallows</span><br>
Most sentimental book
</div>
</div>
</div>
<!-- Second Parallax Image with Portfolio Text -->
<div class="bgimg-7 w3-display-container w3-opacity-min" id="network">
<div class="w3-display-middle">
<span class="w3-xxlarge w3-text-white w3-wide">THE NETWORK</span>
</div>
</div>
<!-- Container (Network Section) -->
<div class="w3-content w3-container w3-padding-64" >
<h2 class="w3-center">NETWORK ANALYSIS</h2>
<h4 class="w3-center"><em>“Harry - you're a great wizard, you know."— Hermione, Harry Potter and the Philosopher's Stone</em></h4>
<p>
In this section we are trying to find out which characters are the most central, more specifically the most important characters. For this we used different tools such as betweenness centrality, PageRank, eigenvector centrality (for in and out links) and the HITS algorithm (finds Hubs and Authorities). We thus use these different measures to be able to compare them and to see if indeed they capture the most important characters in our network. The tools are rather technical to explain, thus we have explained them in our notebook.
<p>
<h3> </h3>
<!--<p>-->
<figure>
<center><img src="Figures/Community/HP_central_methods.png" width="100%" height="100%"
alt = "The network of the main characters" /></center>
</figure>
<!--</p>-->
<p></p>
<p>
In the above graph one can see the results of the different measures, with respect to the maximum.
</p>
<p>
Harry Potter nearly always appears on top. He is just distanced by Ron on the Hubs and eigenvector centrality out. This is because Harry Potter has plenty of links linking to him, thus plenty of people know him. While proportionally Ron has more links linking to other characters (out links). Ron thus knows plenty of other characters, but he is not really well known by the others. This can be due to the fact that he is really close to Harry, this means he will know a lot of people that Harry knows, but then those people will have much more connections to Harry than to him. This happens when your best friend has 7 books and movies about him.
</p>
<p> Both of the measures, Hubs and eigenvector centrality out, focus on the out links. </p>
<p>
We notice that the top 5 characters (Harry Potter, Tom Riddle (Voldemort), Dumbledore, Ron and Hermione) in all the measures are the most important characters in the Harry Potter universe (or at least what we would have guessed).
</p>
<p>
The next 9 characters are all still important but more side characters in the Harry Potter universe. Except from Sirius Black they appear in most of the books.
</p>
<p>
Dolores Umbridge is more special. She is the headmaster of Hogwarts in book 5: “Harry Potter and the Order of the Phoenix”. Thus she is very important in book 5 but otherwise she only appears in book 7 for a minor role.
</p>
<p>
The last 5 characters of the plot represent less famous characters that only appear in one or two Harry Potter book. Some of the names were even not that familiar to us! We can thus take into consideration that betweenness centrality is a poor measure to find central nodes in this graph. In our view the best measure for this graph is PageRank that captures the first 10 characters in the plot.
</p>
<p>
Finally, we notice a big correlation between the eigenvector out centrality and the Hubs. The same yields for eigenvector in centrality and the Authorities.
</p>
</div>
<!-- Second Parallax Image with Portfolio Text -->
<div class="bgimg-5 w3-display-container w3-opacity-min" id="community">
<div class="w3-display-middle">
<span class="w3-xxlarge w3-text-white w3-wide">THE COMMUNITES</span>
</div>
</div>
<!-- Container (About Section) -->
<div class="w3-content w3-container w3-padding-64" >
<h3 class="w3-center">COMMUNITY ANALYSIS</h3>
<h4 class="w3-center"><em>“"We are only as strong as we are united, as weak as we are divided." — Dumbledore, Harry Potter and the Goblet of Fire</em></h4>
<p>We will use the Louvain algorithm to analyse the underlying communities in the Harry Potter network. </p>
<p>
We have thus applied this algorithm to 2 different sets of communities. On the one hand we were interested in the 4 different houses of Hogwarts (Gryffindor, Slytherin, Ravenclaw and Hufflepuff), but on the other hand we were also interested in the interaction between the good and bad guys in Harry Potter. There is in Harry Potter one community of bad guys: the Death Eaters. Yes the name sounds horrible! But also several communities of good guys such as the Order of the Phoenix and the Army of Dumbledore. It seemed thus interesting to assess these two separated problems.
</p>
<p>
We do not know what exactly to expect. We could have 2 different scenarios. One scenario could be that characters interact a lot in there own House. Another scenario would be that there is a lot of interaction between the most famous characters regardless of their house or characteristics. The solution will probably be a blurry mix of these two scenarios.
</p>
<p>
Here is a plot of network with only the House members. Red nodes are Gryffindor nodes. Green nodes are Slytherin nodes. Yellow nodes are Hufflepuff nodes. Blue nodes are Ravenclaw nodes.
</p>
<p>
<center><img src="Figures/Community/Graph_house.png" width="100%" height="100%"
alt = "The network of the main characters" /></center>
</p>
<center><p class="normal"><b> The network with the different Houses visulized </b></p></center>
<p>
We notice in the center a higher density of Gryffindor nodes. This makes sense since in the Harry Potter universe, the main house, the house of Harry Potter, is Gryffindor.
</p>
<p>
We had a problem since not all of our nodes had a house or a good or bad label. We thus only kept the nodes that had such a label and the links between these nodes. We have plotted our results in a confusion matrix in order to make our results more visual.
</p>
<p>
The confusion matrix is a table to compare two different sets of non-overlapping communities.
A number in the square (with row i and column j), is the number of characters that are in both communities, the community corresponding to row i and the community corresponding to row j. We will use the confusion matrix to compare the communities that are naturally present in Harry Potter (houses, Good and Bad Guys) versus the communities found by the Louvain algorithm (these have no name and thus are just numbered).
</p>
<p>
<center><img src="Figures/Community/Confusion_matrix_Houses.png" width="100%" height="100%"
alt = "Visualisation of the houses" /></center>
</p>
<center><p class="normal"> <b>Confusion matrix of the houses </b></p></center>
<p>
This confusion matrix tells us that the categories of houses are not so good to distinguish the different communities. Indeed, most of the table entries have large values. The only community that is somewhat better captured is the slytherin house. On the one hand, we see that the community 3 we have a vast majority of Slytherins. This is also the case in community 6. On the other hand we distinguish communities 2 and 4 which has members of several different houses but only one from Slytherin. Why would Slytherin represent a better community? The explanation that we come up with is that this house has a great deal of bad guys, while the other houses have good guys as members.
</p>
<p>
<center><img src="Figures/Community/Confusion_matrix_Good_bad_guys.png" width="70%" height="70%"
alt = "Visualisation of the communities bad guys vs. good guys" /></center>
</p>
<center><p class="normal"> <b>Confusion Matrix of the Good and bad guys </b></p></center>
<p>
A first observation here is that there are way more good than bad guys. This is because we took several different lists for the Good Guys : First Order of the Phoenix, Dumbledore's Army and the Order of the Phoenix allies. While we only had one interesting list for the bad Guys: the Death Eaters.
</p>
<p>
We see though that the distinction is pretty good! In the first community all but one are members of Dumbledore's Army and all of them are good guys. The 3rd community captures the vast majority of the Death Eaters. We notice that people who were in the frontline to fight against the death eaters such as Severus Snape, Neville’s parents or Sirius Black end up in this community also. The two remaining communities contain other good guys but it is harder to find a deep underlying structure.
<p>
You can find a plot of these communities below. The Death Eater community is in black, while the Good guys are in blue. Each number corresponds to the community found by the Louvain algorithm (column of the confusion matrix).
</p>
<p>
<img src="Figures/Community/Graph_DE_good_guys.png" width="100%" height="100%"
alt = "Visualisation of the communities bad guys vs. good guys" />
</p>
<center><p class="normal"> <b> </b></p></center>
</div>
<!-- Second Parallax Image with Portfolio Text -->
<div class="bgimg-4 w3-display-container w3-opacity-min" id="sentimental">
<div class="w3-display-middle">
<span class="w3-xxlarge w3-text-white w3-wide">SENTIMENTAL ANALYSIS</span>
</div>
</div>
</div>
<!-- Container (SENTIMENTAL ANALYSIS Section) -->
<div class="w3-content w3-container w3-padding-64" >
<h3 class="w3-center">SENTIMENTAL ANALYSIS</h3>
<h4 class="w3-center"><em><br>"Happiness can be found, even in the darkest of times, if one only remembers to turn on the light." —Dumbledore, Harry Potter and the Prisoner of Azkaban </em></h4><br>
<p>
The aim of the sentimental analysis is to understand better the good and bad feelings in the Harry Potter world. We want to discover if the books and the movies are telling the same sentimental story and find the darkest and happiest books.
</p>
<p>
Firstly, we are interested in the book's sentiments. We start with a deep understanding of the books page by page. The sentiment of the pages will guide our analysis.
</p>
<p>
How can a page have a feeling? We have a big list of super commonly used English words and a happiness grade for each one. The grade is based on the comparison and grading on average of the words taken from Twitter, Google Books (English), music lyrics (1960 to 2007), and the New York Times (1987 to 2007). Each word thus has a score. Then, the sentiment of a page will be the average of the sentiments of the words on each page.
</p>
<p>
Using some functions in Python, we have one plot for each movie with the sentiment profile of that book. A sentiment profile is a graph that has the sentiment on the y-axis and position in the text on the x-axis. The effect of changing the size of the sliding window is that it changes the amount of text we are taking in consideration. Starting with 15 words we capture the moving average of the sentiment of a few sentences. Increasing the window the moving average will say more about the whole book and can capture a more important flow. In a page there are usually around 500 words. Using one page as a window, it is possible to follow how the sentiment will change page by page.
</p>
<!-- Slide for the sentimental analysis of the book : -->
<div class="slideshow-container">
<div class="mySlides fade" name="slider0">
<!--<div class="mySlides fade"> -->
<!--<div class="numbertext">1 / 3</div>-->
<center><img src="Figures/Sentimental/Book1.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">2 / 3</div>-->
<center><img src="Figures/Sentimental/Book2.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Book3.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Book4.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Book5.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Book6.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider0">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Book7.png" style="width:100%"></center>
<div class="text"></div name="slider0">
</div>
<a class="prev" onclick="plusSlides(-1,0)">❮</a>
<a class="next" onclick="plusSlides(1,0)">❯</a>
</div>
<script> currentSlide(1,0)</script>
<div style="text-align:center">
<span class="dot" name="dot0" onclick="currentSlide(1,0)">Book1</span>
<span class="dot" name="dot0" onclick="currentSlide(2,0)">Book2</span>
<span class="dot" name="dot0" onclick="currentSlide(3,0)">Book3</span>
<span class="dot" name="dot0" onclick="currentSlide(4,0)">Book4</span>
<span class="dot" name="dot0" onclick="currentSlide(5,0)">Book5</span>
<span class="dot" name="dot0" onclick="currentSlide(6,0)">Book6</span>
<span class="dot" name="dot0" onclick="currentSlide(7,0)">Book7</span>
</div>
<p>
The sentiment profile of the books is different: the maximum and minimum sentiment seem to be the same, but they behave differently. However, from book 5 the range is bigger, spreading in the negative emotion. This could be due to fact that the children that were reading the first 4 books have now grown older and can enjoy scarier, creepier and sader material.
</p>
<p>
There is no book which has a calm behavior around the average. All books, instead, have lot of up and downs with a jagged profile and sudden changes. This is what we expect from this series of books, where there are numerous very happy and sad events.
</p>
For instance in book 1, Harry meets for the first time Voldemort and this is really clear from the sentiment profile. However, there are also deeply sad moments like in book 2 when Harry is fighting Tom Riddle or in books 5 and 6 when respectively both Sirius Black and Dumbledore die. These moments are really clear in the sentiment profile with high and low peak.
<p>
We now take in consideration the subtitles of the 8 movies (book 7 was filmed in 2 parts). We want to visualize the sentiment profile in the movies, find the most sentimental moments. And then compare these moments with the one found in the books. Our goal is to find the difference between books and movies.
</p>
<div class="slideshow-container">
<div class="mySlides fade" name="slider1">
<!--<div class="mySlides fade"> -->
<!--<div class="numbertext">1 / 3</div>-->
<center><img src="Figures/Sentimental/Movie1.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">2 / 3</div>-->
<center><img src="Figures/Sentimental/Movie2.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie3.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie4.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie5.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie6.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie7.png" style="width:100%"></center>
<div class="text"></div>
</div>
<div class="mySlides fade" name="slider1">
<!--<div class="numbertext">3 / 3</div>-->
<center><img src="Figures/Sentimental/Movie8.png" style="width:100%"></center>
<div class="text"></div>
</div>
<a class="prev" onclick="plusSlides(-1,1)">❮</a>
<a class="next" onclick="plusSlides(1,1)">❯</a>
</div>
<script> currentSlide(1,1)</script>
<div style="text-align:center">
<span class="dot" name="dot1" onclick="currentSlide(1,1)">Movie1</span>
<span class="dot" name="dot1" onclick="currentSlide(2,1)">Movie2</span>
<span class="dot" name="dot1" onclick="currentSlide(3,1)">Movie3</span>
<span class="dot" name="dot1" onclick="currentSlide(4,1)">Movie4</span>
<span class="dot" name="dot1" onclick="currentSlide(5,1)">Movie5</span>
<span class="dot" name="dot1" onclick="currentSlide(6,1)">Movie6</span>
<span class="dot" name="dot1" onclick="currentSlide(7,1)">Movie7</span>
<span class="dot" name="dot1" onclick="currentSlide(8,1)">Movie8</span>
</div>
<!----------------------------------------- -->
<p>
On average the saddest movies are Movie 1 and Movie 6, the happiest ones are Movie 8 (part 2 of book 7) and Movie 4. However, all the movies have negative average sentiment. But let’s take a look closer. In movie 1 “Harry Potter and the sorcerer’s stone” there is a super happy moment at 2:12:00. Now we want to double check in the movie which moment is it. This scene is when Harry, Ron, and Hermione play their way across a chessboard to pass the White players in a giant-scaled and dangerous game of Wizard Chess. In order for our heroes to pass, they have to play deadly moves as Harry, Ron, and Hermione are part of the Black pieces as a Bishop, Knight, and Rook. After Ron's sacrifice, there is a very sentimental speech between Harry and Hermione about true friendship and courage.
</p>
<body>
<iframe width="854" height="480" src="https://www.youtube.com/embed/Jx3BuC4teV0?start=150" frameborder="0" allowfullscreen></iframe>
</body>
<br>
<p>
In movie 3 “The prisoner of Azkaban” the saddest moment in when Sirius Black ‘wants’ to kill Harry and Hermione proposes to sacrifice herself to protect Harry.
</p>
<p>
In movie 4 “Harry Potter and the goblet of fire” starts with the saddest moment when Harry has a nightmare about Voldemort killing a housekeeper.
</p>
<p>
The saddest moment ever in all movies is in the last movie “Harry Potter deathly hallows part 2” at 1:02:00. The death eaters are conquering Hogwarts and on top of that Voldemort kills Snape. This moment has an average sentiment is -1 which is the lowest value in the Harry Potter movies.
</p>
<br>
<body>
<iframe width="854" height="480" src="https://www.youtube.com/embed/8cL527bg0II?start=130" frameborder="0" allowfullscreen></iframe>
</body>
<p>
Harry Potter fans, as we are, know that there is a world of difference between the magical books and their respective film adaptations. While the Potter movies are nostalgic and possess a special charm of their own, there is a huge amount of detail that doesn't translate from the page to the screen. What is clear from the sentiment profile is that both are built to leave the reader and watcher in the grey area because there are a lot emotional up and downs at the end of every books and movies. The most important events are present of course in both: in book 1 the second saddest moment is the chess game, which is also the saddest moment in movie 1. It’s possible to find a lot of parallels between them.
</p>
<p>
In conclusion, we found an efficient way to detect and investigate the sentiment profile in the books and movies. We double checked the most sentimental moments and the turning points of the books in the movies and vice versa. It allowed us to understand more about each movie’s structure and why these movies are so passionate.
</p>
</div>
<!-- Third Parallax Image with TFIDF Text -->
<div class="bgimg-6 w3-display-container w3-opacity-min" id="frequency">
<div class="w3-display-middle">
<span class="w3-xxlarge w3-text-white w3-wide">WORD FREQUENCY</span>
</div>
</div>
<!-- Container (TFIDF Section) -->
<div class="w3-content w3-container w3-padding-64">
<h3 class="w3-center">Spell analysis</h3>
<h4 class="w3-center"><em>"For in dreams we enter a world that is entirely our own. Let them swim in the deepest ocean or glide over the highest cloud." — Dumbledore, Harry Potter and the Prisoner of Azkaban</em></h4>
<p>
We now will perform a count of the different spells present in the book. One could ask the question what is this different from the wordcloud? The fact is that most spells are not present with high frequency in the books. However, they appear in crucial moments. Following a spell count can thus give us a rough and maybe more refined idea of what happened at intense moments in the books.
</p>
<p>
The frequency of each casted spell or curse was analyzed. In order to realize this we tokenized the books and then ran the frequency distribution function. Thereafter we matched the spell list excel sheet with each tokenized term count book. Tokenization is for lexical analysis, the process of classifying text into individual words based on a space delimiter. This resulted in the frequency of spells and curses demonstrated below in a bar plot.
</p>
<p>
We expected, given the nature of the story, that the frequency would increase as the universe became more complex with each year. This turned out to be the case.
</p>
<div class="slideshow-container">
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book1.png" style="width:70%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book2.png" style="width:80%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book3.png" style="width:80%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book4.png" style="width:80%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book5.png" style="width:80%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book6.png" style="width:80%"></center>
</div>
<div class="mySlides fade" name="slider3">
<center><img src="Figures/TF/Spells_Book7.png" style="width:80%"></center>
</div>
<a class="prev" onclick="plusSlides(-1,3)">❮</a>
<a class="next" onclick="plusSlides(1,3)">❯</a>
</div>
<script> currentSlide(1,3)</script>
<div style="text-align:center">
<span class="dot" name="dot3" onclick="currentSlide(1,3)">Book1</span>
<span class="dot" name="dot3" onclick="currentSlide(2,3)">Book2</span>
<span class="dot" name="dot3" onclick="currentSlide(3,3)">Book3</span>
<span class="dot" name="dot3" onclick="currentSlide(4,3)">Book4</span>
<span class="dot" name="dot3" onclick="currentSlide(5,3)">Book5</span>
<span class="dot" name="dot3" onclick="currentSlide(6,3)">Book6</span>
<span class="dot" name="dot3" onclick="currentSlide(7,3)">Book7</span>
</div>
<p>
In the first book only few spells are casted and two curses, Mortis and Mimblewimble
</p>
<p>
In the second book we already have more spells but a good example is the 6th book where a lot of spells are casted and many curses.
</p>
<p>
In book 3 we see that the most frequent spell is the expecto patronum. This is because it is the only spell that keeps away the dementors. Harry has to learn to master it because in this book there are plenty of dementors and Harry faints each time a dementor comes in contact with him.
</p>
<p>
In book 6 one can see then the horcrux curse is mentioned over 40 times. That is of no surprise since the plot of the books 6 and 7 is the search for the seven horcruxes that Voldemort divided his soul into and destroy them.
</p>
<p>
"Yes, I think so," said Dumbledore. "Without his
Horcruxes, Voldemort will be a mortal man with a
maimed and diminished soul. Never forget, though,
that while his soul may be damaged beyond repair,
his brain and his magical powers remain intact. It will
take uncommon skill and power to kill a wizard like
Voldemort even without his Horcruxes."
</p>
<p>
And finally we reach book 7 we can see that the search continues for the horcrux, appearing now over 80 times. Furthermore we now have the largest collection of spells and curses.
</p>
<p> We also notice that the words ‘horcrux’ and ‘horcruxes’, as well as other words related to the horcruxes appear in the word cloud of books 6 and 7.
</p>
<h3 class="w3-center">TF-IDF</h3>
<p class="w3-center"><em>Term frequency - inverse document frequency </em></p>
<p>
We want to find the most representing words for each book and visualize them in a figure, a word cloud figure. In a word cloud figure the words differ is size based on how representing they are for a given book.
</p>
<p>
To realize this we utilize the TFIDF criterion:
</p>
<p>
<ul>
<li>TF: Term frequency, in each book see how often each word appears </li>
<li>IDF: Inverse document frequency, see how important this word is compared to its appearance in other books. In other words to see if a given frequent word also appears in the other books.</li>
</ul>
</p>
<p>
The result is a TFIDF matrix, in our case the rows are corresponding to the books and the columns to the words.
</p>
<p>
With this matrix we produce the seven word cloud figures, one for each book.
</p>
<div class="slideshow-container">
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 1 - The Philosopher_s Stone.png" style="width:50%"></center>
<div class="text">Representative word: smelting</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 2 - The Chamber of Secrets.png" style="width:50%"></center>
<div class="text">Representative word: mason</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 3 - The Prisoner of Azkaban.png" style="width:50%"></center>
<div class="text">Representative word: boggart</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 4 - The Goblet of Fire.png" style="width:50%"></center>
<div class="text">Representative word: portkey</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 5 - The Order of the Phoenix.png" style="width:50%"></center>
<div class="text">Representative word: thestral</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 6 - The Half Blood Prince.png" style="width:50%"></center>
<div class="text">Representative word: N/A</div>
</div>
<div class="mySlides fade" name="slider2">
<center><img src="Figures/TF/Wordcloud_Book 7 - The Deathly Hallows.png" style="width:50%"></center>
<div class="text">Representative word: cattermole</div>
</div>
<a class="prev" onclick="plusSlides(-1,2)">❮</a>
<a class="next" onclick="plusSlides(1,2)">❯</a>
</div>
<script> currentSlide(1,2)</script>
<div style="text-align:center">
<span class="dot" name="dot2" onclick="currentSlide(1,2)">Book1</span>
<span class="dot" name="dot2" onclick="currentSlide(2,2)">Book2</span>
<span class="dot" name="dot2" onclick="currentSlide(3,2)">Book3</span>
<span class="dot" name="dot2" onclick="currentSlide(4,2)">Book4</span>
<span class="dot" name="dot2" onclick="currentSlide(5,2)">Book5</span>
<span class="dot" name="dot2" onclick="currentSlide(6,2)">Book6</span>
<span class="dot" name="dot2" onclick="currentSlide(7,2)">Book7</span>
</div>
<h4>Book 1 </h4>
<p>
smelting : The school where Dudley goes to.<br>
flamel: "Nicolas Flamel," she whispered dramatically, "is the only known maker of the Sorcerer's Stone!"<br>
mom: Mrs. Weasley, Ron’s mother is often referred to in this book.<br>
</p>
<h4>Book 2 </h4>
<p>
mason: Mrs. Mason is they lady Harry blows up in the air in the beginning of the book. Resulting in a dramatic discussion whether he should be kicked out of Hogwarts or not.
</p>
<h4>Book 3 </h4>
<p>
boggart :A boggart is an amortal shape-shifting non-being that takes on the form of the viewer's worst fear. The boggart was a big topic of this book since Harry's worst fear was a dementor at the time.<br>
Pettigrew: Is introduced in this book and the reader is informed that he in fact has been Ron's rat.<br>
grindylow : A Grindylow is a small, horned, pale-green water demon and a Dark creature.<br>
hinkypunk :The Hinkypunk is a little one-legged creature and looks as if it is made of smoke.<br>
executioner: An executioner from the Ministry of Magic that is sent to kill Buckbeak, Buckbeak turns out to be too valuable for Harry and the crew so Hermione helps the crew to travel back in time and save Buckbeak.<br>
<br>
From the 5 most representing words of this book, 3 of them are creatures from Professor Remus Lupin Defence against the dark art class, namely the boggart, grindylow and hinkypunk.
</p>
<h4>Book 4 </h4>
<p>
portkey: Is commonly used in this book. A portkey instantly brings anyone touching it to a specific location.<br>
Bulgarian: This book features both the world cup in Quidditch and the competition around the Goblet of fire where the Bulgarian team plays a big role.<br>
Karkaroff : Professor Karkaroff is the headmaster of Scandinavian magic school that the Bulgarian Viktor Krum attends.<br>
Hostage: In the games of the goblet of fire then a hostage had to be saved.<br>
leprechaun: Are magical creature that appear in this book.
</p>
<h4>Book 5 </h4>
<p>
bowtruckle: Is a magical creature studied in the class care of magical creatures.<br>
Thestrals : Skeleton flying horses, they were used in this book to fly the Orders member disguised as harry to a safe spot.<br>
umbridge: Dolores Jane Umbridge becomes a headmaster at Hogwarts in this book replacing Dumbledore.<br>
</p>
<h4>Book 6 </h4>
<p>
Not really a word that represents this book. Notice that ‘horcrux’ and ‘horcruxes’ appear at the top of the ‘H’ and the ‘P’ of the word cloud.
</p>
<h4>Book 7 </h4>
<p>
cattermole: Is the character that Ron is disguised as when the crew goes undercover into the ministry of magic.<br>
xenophilius: He is the father of Luna Lovegood.<br>
carrow :Siblings that are both Death Eaters.<br>
deluminator : Albus Dumbledore's device which is used for taking out any light source and keeping them until bestowing them. <br>
Peverell: the Peverell family was the original owner of the Hallows.
</p>
</div>
<!--
<script>
var slideIndex = 1;
showSlides(slideIndex);
function plusSlides(n) {
showSlides(slideIndex += n);
}
function currentSlide(n) {
showSlides(slideIndex = n);
}
function showSlides(n) {
var i;
var slides = document.getElementsByClassName("mySlides");
var dots = document.getElementsByClassName("dot");
if (n > slides.length) {slideIndex = 1}
if (n < 1) {slideIndex = slides.length}
for (i = 0; i < slides.length; i++) {
slides[i].style.display = "none";
}
for (i = 0; i < dots.length; i++) {
dots[i].className = dots[i].className.replace(" active", "");
}