add spark stuff
This commit is contained in:
parent
07bd6f6315
commit
f4000c60e0
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from pyspark import SparkContext, SparkConf
|
||||
sc = SparkContext()
|
||||
|
||||
#Create an RDD from pride_and_prejudice.txt where every element is is a line of the file.
|
||||
pride_rdd = sc.textFile('hdfs://spark-master:9000/shared/pride_and_prejudice.txt')
|
||||
|
||||
pride_words_try = pride_rdd.flatMap(lambda line: line.split())
|
||||
|
||||
print(pride_words_try.take(4))
|
||||
|
||||
pride_pairs = pride_words_try.map(lambda x: (x, 1))
|
||||
|
||||
print(pride_pairs.take(10))
|
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from pyspark import SparkContext, SparkConf
|
||||
sc = SparkContext()
|
||||
|
||||
#Create an RDD from pride_and_prejudice.txt where every element is is a line of the file.
|
||||
pride_rdd = sc.textFile('hdfs://spark-master:9000/shared/pride_and_prejudice.txt')
|
||||
|
||||
pride_words_try = pride_rdd.flatMap(lambda line: line.split())
|
||||
|
||||
pride_pairs = pride_words_try.map(lambda x: (x, 1))
|
||||
|
||||
word_counts = pride_pairs.reduceByKey(lambda x, y: x + y)
|
||||
|
||||
top10_words = word_counts.takeOrdered(10, key=lambda p: -p[1])
|
||||
|
||||
print(top10_words)
|
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from pyspark import SparkContext, SparkConf
|
||||
|
||||
|
||||
def make_plural(word):
|
||||
return word + "s"
|
||||
|
||||
|
||||
sc = SparkContext()
|
||||
animal_list = ['dog', 'cat', 'rabbit', 'hare', 'deer', 'gull', 'woodpecker', 'mole']
|
||||
|
||||
animal_rdd = sc.parallelize(animal_list, 2)
|
||||
|
||||
lambda_plural_rdd = animal_rdd.map(lambda x: x + "s")
|
||||
|
||||
print(lambda_plural_rdd.collect())
|
||||
|
||||
word_lengths = animal_rdd.map(lambda x: len(x))
|
||||
|
||||
print(word_lengths.collect())
|
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from pyspark import SparkContext, SparkConf
|
||||
|
||||
|
||||
def make_plural(word):
|
||||
return word + "s"
|
||||
|
||||
|
||||
sc = SparkContext()
|
||||
animal_list = ['dog', 'cat', 'rabbit', 'hare', 'deer', 'gull', 'woodpecker', 'mole']
|
||||
|
||||
animal_rdd = sc.parallelize(animal_list, 2)
|
||||
|
||||
plural_rdd = animal_rdd.map(make_plural)
|
||||
|
||||
print(plural_rdd.collect())
|
Loading…
Reference in New Issue