-
Notifications
You must be signed in to change notification settings - Fork 1
/
mrtask_e.py
36 lines (29 loc) · 1.33 KB
/
mrtask_e.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#Task 4
# (e) Calculate the average tips to revenue ratio of the drivers for different pickup locations in sorted format.
from mrjob.job import MRJob # importing mrjob library
class AverageTips_To_RevenueRatio(MRJob): # extending the MRJob class
def mapper(self, _, line): # mapper function
# Skip the header line
if not line.startswith('VendorID'):
fields = line.split(',')
pickup_location = fields[7]
total_revenue = float(fields[16])
tips = float(fields[13])
yield pickup_location, (tips, total_revenue)
def combiner(self, pickup_location, tips_revenues): # combiner function
total_tips = 0
total_revenue = 0
for tips, revenue in tips_revenues:
total_tips += tips
total_revenue += revenue
yield pickup_location, (total_tips, total_revenue)
def reducer(self, pickup_location, tips_revenues): # reducer function
total_tips = 0
total_revenue = 0
for tips, revenue in tips_revenues:
total_tips += tips
total_revenue += revenue
average_tips_to_revenue_ratio = total_tips / total_revenue
yield pickup_location, average_tips_to_revenue_ratio
if __name__ == '__main__': # main function
AverageTips_To_RevenueRatio.run() # calling the run function