Livy is an open source REST interface
for interacting with Spark. pylivy
is a
Python client for Livy, enabling easy remote code execution on a Spark cluster.
from livy import LivySession
LIVY_URL = 'http://spark.example.com:8998'
with LivySession(LIVY_URL) as session:
# Run some code on the remote cluster
session.run("filtered = df.filter(df.name == 'Bob')")
# Retrieve the result
local_df = session.read('filtered')