18 lines
522 B
Python
Executable File
18 lines
522 B
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
from pyspark import SparkContext, SparkConf
|
|
sc = SparkContext()
|
|
|
|
#Create an RDD from pride_and_prejudice.txt where every element is is a line of the file.
|
|
pride_rdd = sc.textFile('hdfs://spark-master:9000/shared/pride_and_prejudice.txt')
|
|
|
|
pride_words_try = pride_rdd.flatMap(lambda line: line.split())
|
|
|
|
pride_pairs = pride_words_try.map(lambda x: (x, 1))
|
|
|
|
word_counts = pride_pairs.reduceByKey(lambda x, y: x + y)
|
|
|
|
top10_words = word_counts.takeOrdered(10, key=lambda p: -p[1])
|
|
|
|
print(top10_words)
|