Search code examples
pythonh2o

H2O AutoML keeps getting Unexpected HTTP error


I have tried h2o in one dataset with the exact same code, and now trying to try with another dataset. But I keep getting 'Unexpected HTTP error'

The code sample is as follows:

import h2o
h2o.init()
train_data = h2o.import_file("pathtofile.csv")
x = train_data.columns
y = "Class"
x.remove(y)
train_data[y] = train_data[y].asfactor()
from h2o.automl import H2OAutoML
aml = H2OAutoML(max_models=10, seed=1,  max_runtime_secs=57600)
aml.train(x=x, y=y, training_frame=train_data)

The error at this point is:

---------------------------------------------------------------------------
H2OConnectionError                        Traceback (most recent call last)
<ipython-input-14-435d6f31b64e> in <module>()
      1 from h2o.automl import H2OAutoML
      2 aml = H2OAutoML(max_models=10, seed=1,  max_runtime_secs=57600)
----> 3 aml.train(x=x, y=y, training_frame=train_data)

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/automl/autoh2o.pyc in train(self, x, y, training_frame, fold_column, weights_column, validation_frame, leaderboard_frame, blending_frame)
    443         poll_updates = ft.partial(self._poll_training_updates, verbosity=self._verbosity, state={})
    444         try:
--> 445             self._job.poll(poll_updates=poll_updates)
    446         finally:
    447             poll_updates(self._job, 1)

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/job.pyc in poll(self, poll_updates)
     55             pb = ProgressBar(title=self._job_type + " progress", hidden=hidden)
     56             if poll_updates:
---> 57                 pb.execute(self._refresh_job_status, print_verbose_info=ft.partial(poll_updates, self))
     58             else:
     59                 pb.execute(self._refresh_job_status)

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/utils/progressbar.pyc in execute(self, progress_fn, print_verbose_info)
    169                 # Query the progress level, but only if it's time already
    170                 if self._next_poll_time <= now:
--> 171                     res = progress_fn()  # may raise StopIteration
    172                     assert_is_type(res, (numeric, numeric), numeric)
    173                     if not isinstance(res, tuple):

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/job.pyc in _refresh_job_status(self)
     92     def _refresh_job_status(self):
     93         if self._poll_count <= 0: raise StopIteration("")
---> 94         jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
     95         self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
     96         self.status = self.job["status"]

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/h2o.pyc in api(endpoint, data, json, filename, save_to)
    102     # type checks are performed in H2OConnection class
    103     _check_connection()
--> 104     return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
    105 
    106 

/opt/anaconda3/envs/ege/lib/python2.7/site-packages/h2o/backend/connection.pyc in request(self, endpoint, data, json, filename, save_to)
    439             else:
    440                 self._log_end_exception(e)
--> 441                 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
    442         except requests.exceptions.Timeout as e:
    443             self._log_end_exception(e)

H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', error(104, 'Connection reset by peer'))

I have tried h2o.cluster().shutdown() and killing the process but I keep getting the above error.


Solution

  • Turns out, in the dataset one column was including names that have non-UTF-8 characters like 'Ö', 'Ş' etc. So after deleting this column, it started working again. Which in my opinion should be fixed by the H2O in later releases.