forked from itratrahman/mapreduce_with_mrjobs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
2AverageFriendsByAge.py
43 lines (30 loc) · 1.37 KB
/
2AverageFriendsByAge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
####This source code gives an example demonstration of how to find average frineds by age from a large data set using MapReduce
##import statements
from mrjob.job import MRJob
##The MapReduce class which is passed an object of MRJob
class AverageFriendsByAge(MRJob):
##Mapper function extracts the information we care about and outputs key value pairs
def mapper(self, _, line):
##Splitting the comma delimited data and storing them into individual variables
(ID, name, age, numFriends) = line.split(',')
##Yielding age, number of friends;
#number of friends is casted into a float so that we can perform arithmetice operations on
yield age, float(numFriends)
##The reducer function uses the grouped and sorted key/value pairs and outputs the final result
def reducer(self, age, numFriends):
##Variable to store the total number of friends
total = 0
##Variable to store the number of items in the list
numElements = 0
##for loop to calculate the average number of friends in a particular age group
for x in numFriends:
##Totaling the elements in the list
total += x
##incrementing the counter
numElements += 1
##Yielding age, average number of friends in an age group
yield age, total/numElements
##The main method of the sourcecode
if __name__ == '__main__':
##Running the map reduce class above
AverageFriendsByAge.run()