How to use the databand.output function in databand

To help you get started, we’ve selected a few databand examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github databand-ai / dbnd / examples / src / dbnd_examples / dbnd_spark / word_count_inline.py View on Github external
lines = text.rdd.map(lambda r: r[0])
    counts = (
        lines.flatMap(lambda x: x.split(" ")).map(lambda x: (x, 1)).reduceByKey(add)
    )
    counts.saveAsTextFile(str(counters))
    output = counts.collect()
    for (word, count) in output:
        print("%s: %i" % (word, count))

    return get_spark_session().createDataFrame(counts)


class WordCountSparkInline(PySparkInlineTask):
    text = parameter.csv[spark.DataFrame]
    counters = output.txt.data
    counters_auto_save = output[spark.DataFrame]

    def run(self):
        from operator import add
        from dbnd_spark.spark import get_spark_session

        lines = self.text.rdd.map(lambda r: r[0])
        counts = (
            lines.flatMap(lambda x: x.split(" ")).map(lambda x: (x, 1)).reduceByKey(add)
        )
        counts.saveAsTextFile(str(self.counters))
        output = counts.collect()
        for (word, count) in output:
            print("%s: %i" % (word, count))

        self.counters_auto_save = get_spark_session().createDataFrame(counts)