I have tried h2o in one dataset with the exact same code, and now trying to try with another dataset. But I keep getting 'Unexpected HTTP error'
The code sample is as follows:
import h2o
h2o.init()
train_data = h2o.import_file("pathtofile.csv")
x = train_data.columns
y = "Class"
x.remove(y)
train_data[y] = train_data[y].asfactor()
from h2o.automl import H2OAutoML
aml = H2OAutoML(max_models=10, seed=1, max_runtime_secs=57600)
aml.train(x=x, y=y, training_frame=train_data)
The error at this point is:
---------------------------------------------------------------------------
H2OConnectionError Traceback (most recent call last)
<ipython-input-14-435d6f31b64e> in <module>()
1 from h2o.automl import H2OAutoML
2 aml = H2OAutoML(max_models=10, seed=1, max_runtime_secs=57600)
----> 3 aml.train(x=x, y=y, training_frame=train_data)
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/automl/autoh2o.pyc in train(self, x, y, training_frame, fold_column, weights_column, validation_frame, leaderboard_frame, blending_frame)
443 poll_updates = ft.partial(self._poll_training_updates, verbosity=self._verbosity, state={})
444 try:
--> 445 self._job.poll(poll_updates=poll_updates)
446 finally:
447 poll_updates(self._job, 1)
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/job.pyc in poll(self, poll_updates)
55 pb = ProgressBar(title=self._job_type + " progress", hidden=hidden)
56 if poll_updates:
---> 57 pb.execute(self._refresh_job_status, print_verbose_info=ft.partial(poll_updates, self))
58 else:
59 pb.execute(self._refresh_job_status)
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/utils/progressbar.pyc in execute(self, progress_fn, print_verbose_info)
169 # Query the progress level, but only if it's time already
170 if self._next_poll_time <= now:
--> 171 res = progress_fn() # may raise StopIteration
172 assert_is_type(res, (numeric, numeric), numeric)
173 if not isinstance(res, tuple):
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/job.pyc in _refresh_job_status(self)
92 def _refresh_job_status(self):
93 if self._poll_count <= 0: raise StopIteration("")
---> 94 jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
95 self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
96 self.status = self.job["status"]
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/h2o.pyc in api(endpoint, data, json, filename, save_to)
102 # type checks are performed in H2OConnection class
103 _check_connection()
--> 104 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
105
106
/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/backend/connection.pyc in request(self, endpoint, data, json, filename, save_to)
439 else:
440 self._log_end_exception(e)
--> 441 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
442 except requests.exceptions.Timeout as e:
443 self._log_end_exception(e)
H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', error(104, 'Connection reset by peer'))
I have tried h2o.cluster().shutdown()
and killing the process but I keep getting the above error.
Turns out, in the dataset one column was including names that have non-UTF-8 characters like 'Ö', 'Ş' etc. So after deleting this column, it started working again. Which in my opinion should be fixed by the H2O in later releases.