mrjob
mrjob copied to clipboard
NameError: argments is not defined
I want to join between two files,but I get Error error : NameError: name 'names' is not defined
!python job.py data.txt --database item.txt
from mrjob.job import MRJob
from mrjob.step import MRStep
class MRPeopleScores(MRJob):
def steps(self):
return [
MRStep(mapper=self.mapper_1,reducer_init=self.reducer_init, reducer=self.reducer_1)
]
def configure_args(self):
super(MRPeopleScores, self).configure_args()
self.add_file_arg('--database')
def mapper(self, _, line):
(employee_id, age,var_,salary) = line.split("\t")
yield int(employee_id), salary
def reducer_init(self):
with open("item.txt") as f:
for line in f:
fields = line.split('|')
self.names[fields[0]] = fields[1]
def reducer(self,employee_id, salary):
for salary_ in salary:
yield employee_id,(salary_,names[employee_id])
if __name__ == '__main__':
MRPeopleScores.run()
Hello: I think that you should declare self.names=[] under reducer_init, and to add "self.names..." in the last yield. Best.