add spark stuff
This commit is contained in:
parent
07bd6f6315
commit
f4000c60e0
|
@ -0,0 +1,15 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pyspark import SparkContext, SparkConf
|
||||||
|
sc = SparkContext()
|
||||||
|
|
||||||
|
#Create an RDD from pride_and_prejudice.txt where every element is is a line of the file.
|
||||||
|
pride_rdd = sc.textFile('hdfs://spark-master:9000/shared/pride_and_prejudice.txt')
|
||||||
|
|
||||||
|
pride_words_try = pride_rdd.flatMap(lambda line: line.split())
|
||||||
|
|
||||||
|
print(pride_words_try.take(4))
|
||||||
|
|
||||||
|
pride_pairs = pride_words_try.map(lambda x: (x, 1))
|
||||||
|
|
||||||
|
print(pride_pairs.take(10))
|
|
@ -0,0 +1,17 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pyspark import SparkContext, SparkConf
|
||||||
|
sc = SparkContext()
|
||||||
|
|
||||||
|
#Create an RDD from pride_and_prejudice.txt where every element is is a line of the file.
|
||||||
|
pride_rdd = sc.textFile('hdfs://spark-master:9000/shared/pride_and_prejudice.txt')
|
||||||
|
|
||||||
|
pride_words_try = pride_rdd.flatMap(lambda line: line.split())
|
||||||
|
|
||||||
|
pride_pairs = pride_words_try.map(lambda x: (x, 1))
|
||||||
|
|
||||||
|
word_counts = pride_pairs.reduceByKey(lambda x, y: x + y)
|
||||||
|
|
||||||
|
top10_words = word_counts.takeOrdered(10, key=lambda p: -p[1])
|
||||||
|
|
||||||
|
print(top10_words)
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pyspark import SparkContext, SparkConf
|
||||||
|
|
||||||
|
|
||||||
|
def make_plural(word):
|
||||||
|
return word + "s"
|
||||||
|
|
||||||
|
|
||||||
|
sc = SparkContext()
|
||||||
|
animal_list = ['dog', 'cat', 'rabbit', 'hare', 'deer', 'gull', 'woodpecker', 'mole']
|
||||||
|
|
||||||
|
animal_rdd = sc.parallelize(animal_list, 2)
|
||||||
|
|
||||||
|
lambda_plural_rdd = animal_rdd.map(lambda x: x + "s")
|
||||||
|
|
||||||
|
print(lambda_plural_rdd.collect())
|
||||||
|
|
||||||
|
word_lengths = animal_rdd.map(lambda x: len(x))
|
||||||
|
|
||||||
|
print(word_lengths.collect())
|
|
@ -0,0 +1,17 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pyspark import SparkContext, SparkConf
|
||||||
|
|
||||||
|
|
||||||
|
def make_plural(word):
|
||||||
|
return word + "s"
|
||||||
|
|
||||||
|
|
||||||
|
sc = SparkContext()
|
||||||
|
animal_list = ['dog', 'cat', 'rabbit', 'hare', 'deer', 'gull', 'woodpecker', 'mole']
|
||||||
|
|
||||||
|
animal_rdd = sc.parallelize(animal_list, 2)
|
||||||
|
|
||||||
|
plural_rdd = animal_rdd.map(make_plural)
|
||||||
|
|
||||||
|
print(plural_rdd.collect())
|
Loading…
Reference in New Issue