DataScience/spark/lambda_lengths.py

22 lines
438 B
Python
Executable File

#!/usr/bin/env python3
from pyspark import SparkContext, SparkConf
def make_plural(word):
return word + "s"
sc = SparkContext()
animal_list = ['dog', 'cat', 'rabbit', 'hare', 'deer', 'gull', 'woodpecker', 'mole']
animal_rdd = sc.parallelize(animal_list, 2)
lambda_plural_rdd = animal_rdd.map(lambda x: x + "s")
print(lambda_plural_rdd.collect())
word_lengths = animal_rdd.map(lambda x: len(x))
print(word_lengths.collect())