Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
k.yarn_api.cluster_metrics() and other diagnostic methods;
- that the ApplicationMaster crashed - check the application logs, k.logs();
- that the cluster is otherwise unhealthy - check the RM and NN logs
(use k.yarn_api.system_logs() to find these on a one-node system
""")
master_rpchost = self.client.masterRPCHost()
gateway = JavaGateway(GatewayClient(
address=master_rpchost, port=master_rpcport), auto_convert=True)
self.master = gateway.entry_point
rfiles = [triple_slash(f) if f.startswith('hdfs://') else
'/'.join(['hdfs://', self.hdfs_home, '.knitDeps',
os.path.basename(f)])
for f in files]
logger.debug("Resource files: %s" % rfiles)
jfiles = ListConverter().convert(rfiles, gateway._gateway_client)
jenv = MapConverter().convert(envvars, gateway._gateway_client)
self.master.init(jfiles, jenv, cmd, num_containers,
virtual_cores, memory)
return self.app_id
def __init__(self,service_listener=None,connection_listener=None,gateway_parameters=None,callback_server_parameters=None):
self._gateway = None
self._lock = RLock()
self._consumer = None
self._imported_endpoints = {}
self._imported_endpoints_lock = RLock()
self._exported_endpoints_lock = RLock()
self._exported_endpoints = {}
self._map_converter = MapConverter()
self._list_converter = ListConverter()
self._service_listener = service_listener
self._connection_listener = connection_listener
self._connection = None
self._gateway_parameters = gateway_parameters
self._callback_server_parameters = callback_server_parameters
def __init__(self,listener=None):
self._gateway = None
self._lock = RLock()
self._consumer = None
self._endpoints_lock = RLock()
self._endpoints = {}
self._map_converter = MapConverter()
self._list_converter = ListConverter()
self._listener = listener
def _py2java(sc, obj):
""" Convert Python object into Java """
if isinstance(obj, RDD):
obj = _to_java_object_rdd(obj)
elif isinstance(obj, DataFrame):
obj = obj._jdf
elif isinstance(obj, SparkContext):
obj = obj._jsc
elif isinstance(obj, list):
obj = ListConverter().convert([_py2java(sc, x) for x in obj], sc._gateway._gateway_client)
elif isinstance(obj, JavaObject):
pass
elif isinstance(obj, (int, long, float, bool, bytes, unicode)):
pass
else:
data = bytearray(PickleSerializer().dumps(obj))
obj = sc._jvm.SerDe.loads(data)
return obj
def __init__(self,service_listener=None,connection_listener=None):
self._gateway = None
self._lock = RLock()
self._consumer = None
self._imported_endpoints = {}
self._imported_endpoints_lock = RLock()
self._exported_endpoints_lock = RLock()
self._exported_endpoints = {}
self._map_converter = MapConverter()
self._list_converter = ListConverter()
self._service_listener = service_listener
self._connection_listener = None
self._connection = None
def _py2java(gateway, obj):
""" Convert Python object into Java """
if isinstance(obj, RDD):
obj = _to_java_object_rdd(obj)
elif isinstance(obj, DataFrame):
obj = obj._jdf
elif isinstance(obj, SparkContext):
obj = obj._jsc
elif isinstance(obj, (list, tuple)):
obj = ListConverter().convert([_py2java(gateway, x) for x in obj],
gateway._gateway_client)
elif isinstance(obj, dict):
result = {}
for (key, value) in obj.items():
result[key] = _py2java(gateway, value)
obj = MapConverter().convert(result, gateway._gateway_client)
elif isinstance(obj, JavaValue):
obj = obj.value
elif isinstance(obj, JavaObject):
pass
elif isinstance(obj, (int, long, float, bool, bytes, unicode)):
pass
else:
data = bytearray(PickleSerializer().dumps(obj))
obj = gateway.jvm.org.apache.spark.bigdl.api.python.BigDLSerDe.loads(data)
return obj
def to_java_list(ls, gateway_client):
"""
Convert a python list into java list
:param ls: python list to be converted
:param gateway_client: gateway client object
:return: java list
"""
return java_collections.ListConverter().convert([] if ls is None else ls, gateway_client._gateway_client)
@property
def _jrdd(self):
if self._jrdd_val:
return self._jrdd_val
if self._bypass_serializer:
self._jrdd_deserializer = NoOpSerializer()
command = (self.func, self._prev_jrdd_deserializer,
self._jrdd_deserializer)
# the serialized command will be compressed by broadcast
ser = CloudPickleSerializer()
pickled_command = ser.dumps(command)
if pickled_command > (1 << 20): # 1M
broadcast = self.ctx.broadcast(pickled_command)
pickled_command = ser.dumps(broadcast)
broadcast_vars = ListConverter().convert(
[x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
self.ctx._gateway._gateway_client)
self.ctx._pickled_broadcast_vars.clear()
env = MapConverter().convert(self.ctx.environment,
self.ctx._gateway._gateway_client)
includes = ListConverter().convert(self.ctx._python_includes,
self.ctx._gateway._gateway_client)
python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
bytearray(pickled_command),
env, includes, self.preservesPartitioning,
self.ctx.pythonExec,
broadcast_vars, self.ctx._javaAccumulator)
self._jrdd_val = python_rdd.asJavaRDD()
return self._jrdd_val