Troubleshoot pyspark Py4JNetworkError: Answer from Java side is empty
The pyspark library communicates to a spark driver. If the driver dies, we'll obtain some odd error messages that will not directly indicate the root cause.
That is an odd pyspark error due to the nature of how the integration with spark works. In short, when you create a spark session, a separate spark (java) process will start and then, python will send commands to execute.
Software | Version |
---|---|
python | 3.9 |
pyspark | 3.3.2 |
Spark | 3.3.2 |
It can happen that the driver dies but we can still execute pyspark commands (though then we will receive an error).
The driver can die due to several causes, but the most common is it being killed by Out Of Memory (OOM) error. Make sure you have not requested more memory spark.driver.memory
than available in the system and that you are not trying to be too greedy passing data to the driver.
Then, restart the python app and try again.
Error Stacktraces
The exact error will depend on what function have we called, so I will provide a couple of examples.
- For
.show()
:Py4JError: An error occurred while calling o495.showString
- For
.count()
:Py4JError: An error occurred while calling o363.count
- For
.write.parquet()
:Py4JError: An error occurred while calling o364.parquet
In any case, if you try to execute another command it will appear ConnectionRefusedError: [Errno 111] Connection refused
This indicates that the driver is no longer active
ERROR:root:Exception while sending command. (86 + 32) / 928]
Traceback (most recent call last):
File "venv/lib/python3.9/site-packages/py4j/clientserver.py", line 516, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "venv/lib/python3.9/site-packages/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "venv/lib/python3.9/site-packages/py4j/clientserver.py", line 539, in send_command
raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/home/marti/adsb_data/venv/lib/python3.9/site-packages/py4j/clientserver.py", line 516, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "venv/lib/python3.9/site-packages/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/home/marti/adsb_data/venv/lib/python3.9/site-packages/py4j/clientserver.py", line 539, in send_command
raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
Input In [12], in <cell line: 2>()
1 flight_time = compute_flight_time(gulfstream)
----> 2 flight_time.show()
File ~/venv/lib/python3.9/site-packages/pyspark/sql/dataframe.py:606, in DataFrame.show(self, n, truncate, vertical)
603 raise TypeError("Parameter 'vertical' must be a bool")
605 if isinstance(truncate, bool) and truncate:
--> 606 print(self._jdf.showString(n, 20, vertical))
607 else:
608 try:
File ~/venv/lib/python3.9/site-packages/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
1315 command = proto.CALL_COMMAND_NAME +\
1316 self.command_header +\
1317 args_command +\
1318 proto.END_COMMAND_PART
1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
1324 for temp_arg in temp_args:
1325 temp_arg._detach()
File ~/venv/lib/python3.9/site-packages/pyspark/sql/utils.py:190, in capture_sql_exception.<locals>.deco(*a, **kw)
188 def deco(*a: Any, **kw: Any) -> Any:
189 try:
--> 190 return f(*a, **kw)
191 except Py4JJavaError as e:
192 converted = convert_exception(e.java_exception)
File ~/adsb_data/venv/lib/python3.9/site-packages/py4j/protocol.py:334, in get_return_value(answer, gateway_client, target_id, name)
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
333 else:
--> 334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
336 format(target_id, ".", name))
337 else:
338 type = answer[1]
Py4JError: An error occurred while calling o495.showString
---------------------------------------------------------------------------
ConnectionRefusedError Traceback (most recent call last)
Input In [13], in <cell line: 3>()
----> 3 gulfstream.where("r = 'N268VT' and alt_baro > 100").count() * 5 / 60 /60
File ~/venv/lib/python3.9/site-packages/pyspark/sql/dataframe.py:2077, in DataFrame.filter(self, condition)
2052 """Filters rows using the given condition.
2053
2054 :func:`where` is an alias for :func:`filter`.
(...)
2074 [Row(age=2, name='Alice')]
2075 """
2076 if isinstance(condition, str):
-> 2077 jdf = self._jdf.filter(condition)
2078 elif isinstance(condition, Column):
2079 jdf = self._jdf.filter(condition._jc)
File ~/venv/lib/python3.9/site-packages/py4j/java_gateway.py:1320, in JavaMember.__call__(self, *args)
1313 args_command, temp_args = self._build_args(*args)
1315 command = proto.CALL_COMMAND_NAME +\
1316 self.command_header +\
1317 args_command +\
1318 proto.END_COMMAND_PART
-> 1320 answer = self.gateway_client.send_command(command)
1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
1324 for temp_arg in temp_args:
File ~/venv/lib/python3.9/site-packages/py4j/java_gateway.py:1036, in GatewayClient.send_command(self, command, retry, binary)
1015 def send_command(self, command, retry=True, binary=False):
1016 """Sends a command to the JVM. This method is not intended to be
1017 called directly by Py4J users. It is usually called by
1018 :class:`JavaMember` instances.
(...)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
File ~/venv/lib/python3.9/site-packages/py4j/clientserver.py:284, in JavaClient._get_connection(self)
281 pass
283 if connection is None or connection.socket is None:
--> 284 connection = self._create_new_connection()
285 return connection
File ~/venv/lib/python3.9/site-packages/py4j/clientserver.py:291, in JavaClient._create_new_connection(self)
287 def _create_new_connection(self):
288 connection = ClientServerConnection(
289 self.java_parameters, self.python_parameters,
290 self.gateway_property, self)
--> 291 connection.connect_to_java_server()
292 self.set_thread_connection(connection)
293 return connection
File ~/venv/lib/python3.9/site-packages/py4j/clientserver.py:438, in ClientServerConnection.connect_to_java_server(self)
435 if self.ssl_context:
436 self.socket = self.ssl_context.wrap_socket(
437 self.socket, server_hostname=self.java_address)
--> 438 self.socket.connect((self.java_address, self.java_port))
439 self.stream = self.socket.makefile("rb")
440 self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused