Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
lines = text.rdd.map(lambda r: r[0])
counts = (
lines.flatMap(lambda x: x.split(" ")).map(lambda x: (x, 1)).reduceByKey(add)
)
counts.saveAsTextFile(str(counters))
output = counts.collect()
for (word, count) in output:
print("%s: %i" % (word, count))
return get_spark_session().createDataFrame(counts)
class WordCountSparkInline(PySparkInlineTask):
text = parameter.csv[spark.DataFrame]
counters = output.txt.data
counters_auto_save = output[spark.DataFrame]
def run(self):
from operator import add
from dbnd_spark.spark import get_spark_session
lines = self.text.rdd.map(lambda r: r[0])
counts = (
lines.flatMap(lambda x: x.split(" ")).map(lambda x: (x, 1)).reduceByKey(add)
)
counts.saveAsTextFile(str(self.counters))
output = counts.collect()
for (word, count) in output:
print("%s: %i" % (word, count))
self.counters_auto_save = get_spark_session().createDataFrame(counts)