Search code examples
erlangelixiractorerl

a erlang actor little demo


I am a rookie in Erlang, tired to write the code for spider:

-module(http).
-compile([export_all]).

init() ->
  ssl:start(),
  inets:start(),
  register(m, spawn(fun() -> loop() end)),
  register(fetch, spawn(fun() -> x() end)),
  ok.

start() ->
  L1 = [114689,114688,114691,114690], % detail page id

  lists:map(fun(Gid) ->
    io:format("~p ~n", [Gid]),
    fetch ! {go, Gid}
  end, L1),
  m ! done,
  done.

main(_) ->
  init(),
  start().

loop() ->
  io:fwrite("this is in loop!!"),
  receive
    {no_res, Gid} ->
      io:format("~p no res ! ~n", [Gid]),
      loop();
    {have_res, Gid} ->
      io:format("~p have res ! ~n", [Gid]),
      loop();
    done ->
      io:format("wowowow", [])
  end.

x() ->
  receive
    {go, Gid} ->
      http_post(Gid);
    _ ->
      ready
  end.

http_post(Gid) ->
  URL = "https://example.com", % url demo 
  Type = "application/json",
  ReqArr = ["[{\"id\": \"", integer_to_list(Gid), "\"}]"],
  ReqBody = string:join(ReqArr, ""),

  case httpc:request(post, {URL, [], Type, ReqBody}, [], []) of
    {ok, {_, _, ResBody}} ->
      if
        length(ResBody) =:= 0 ->
          io:format("Y: ~p ~n", [Gid]);
          m ! {no_res, Gid};
        true ->
          io:format("N: ~p ~n", [Gid])
          m ! {have_res, Gid}
      end;
    {error, Reason} ->
      io:format("error cause ~p~n", [Reason]);
    _ ->
      io:format("error cause ~n", [])
  end.

Now, when I exceute code, process will terminate at once, log:

enter image description here

I have two question:

  1. How I solve this problem ?
  2. If I have tens of thousands of id in L1 , how solve? Spawn dozens of actors? If yes, how do you decide which actor to receive which id?

Solution

  • 1) Instead of wrapping an anonymous function around loop():

    register(m, spawn(fun() -> loop() end)),
    

    you can call spawn/3:

    register(m, spawn(?MODULE, loop, []) ),
    

    Same here:

    register(fetch, spawn(fun() -> x() end)),
    

    Nope. Calling spawn/3 in an escript doesn't work--unless you precompile the script with:

    escript -c myscript.erl
    

    2) An escript creates a process that executes a main/1 function that you define. Your main/1 function looks like this:

    main(_) ->
      init(),
      start().
    

    The init() function doesn't loop so it ends after the all the functions it calls return, i.e. ssl:start(), inets:start(), register(). Your start() function doesn't loop either, so after start() returns, then main() returns and because the process executing the main() function has nothing left to do, it ends.

    3)

    How I solve this problem ?

    Http post requests take an eternity in terms of computer processing speeds and there is waiting involved, so you can speed up your code by executing multiple post requests at the same time rather than executing them sequentially. In erlang, the way you execute things at the same time is to spawn additional processes. In your case, that would mean spawning a new process for each post request.

    Your main process can be an infinite loop that sits in a receive waiting for messages, something like this:

    main(_) ->
      init(),
      loop().
    

    where init() looks like this:

    init() ->
      ssl:start(),
      inets:start(),
      register(loop, self()),
      ok.
    

    Then you can create a user interface function like start() that spawns the post requests:

    start() ->
      L1 = [114689,114688,114691,114690], % detail page id
    
      lists:foreach(fun(Gid) ->
        io:format("~p ~n", [Gid]),
        spawn(?MODULE, http_post, [Gid])
      end, L1).
    

    ---Response to comment---

    Here's an example:

    %First line cannot have erlang code.
    main(_) ->
        init(),
        start().
    
    init() ->
        ssl:start(),
        inets:start().
    
    start() ->
        L1 = [1, 2, 3, 4],
        Self = self(),
    
        Pids = lists:map(fun(Gid) ->
            Pid = spawn(fun() -> http_post(Gid, Self) end),
            io:format("Spawned process with Gid=~w, Pid=~w~n", [Gid, Pid]),
            Pid
        end, L1),
    
        io:format("Pids = ~w~n", [Pids]),
    
        lists:foreach(
            fun(Pid) ->
                receive
                    {no_res, {Gid, Pid} } ->
                        io:format("no response! (Gid=~w, Pid=~w)~n", [Gid, Pid]);
                    {have_res, {Gid, Pid, Reply}} ->
                        io:format("got response: ~p~n(Gid=~w, Pid=~w)~n~n", 
                                  [Reply, Gid, Pid]);
                    {Pid, Gid, Error} ->
                        io:format("Error:~n(Gid=~w, Pid=~w)~n~p~n", [Gid, Pid, Error])
                end
            end, Pids).
    
    http_post(Gid, Pid) ->
      URL = "http://localhost:8000/cgi-bin/read_json.py", % url demo 
      Type = "application/json",
      ReqArr = ["[{\"id\": \"", integer_to_list(Gid), "\"}]"],
      ReqBody = string:join(ReqArr, ""),
    
      case httpc:request(post, {URL, [], Type, ReqBody}, [], []) of
        {ok, {_, _, ResBody}} ->
          if
            length(ResBody) =:= 0 ->
              io:format("Y: ~p ~n", [Gid]),
              Reply = {no_res, {Gid, self()} },
              Pid ! Reply;
            true ->
              io:format("N: ~p ~n", [Gid]),
              Reply = {have_res, {Gid, self(), ResBody} },
              Pid ! Reply
          end;
        {error, _Reason}=Error ->
            Pid ! {Gid, self(), Error};
        Other ->
            Pid ! {Gid, self(), Other} 
      end.
    

    If I have tens of thousands of id in L1 , how solve?

    The same way. One hundred thousand processes is not considered a lot of processes in erlang.