from mrjob.job import MRJob
from mrjob.step import MRStep
import re

WORD_REGEX = re.compile(r"[\w]+")

class FreqCount(MRJob):

    def steps(self):
        return [
            MRStep(mapper=self.mapper,
                   reducer=self.reduce_length),
            MRStep(reducer=self.reduce_count)]


    def mapper(self, _, line):
        for word in WORD_REGEX.findall(line):
            yield word.lower(), 1


    def reduce_length(self, word, count):
        yield len(word), sum(count)


    def reduce_count(self, word_len, count):
        yield word_len, sum(count)

if __name__ == "__main__":
    FreqCount().run()
