I'm trying to port this c# class to Delphi: http://lab.abhinayrathore.com/imdb/imdb_asp_csharp.htm
I have allready wirtten the Helper classes e.g MAtchAll etc. and written the code to get the http from a WebPage.
But my problem is this line :
ArrayList imdbUrls = matchAll(@"<a href=""(http://www.imdb.com/title/tt\d{7}/)"".*?>.*?</a>", html);
Here is my version of MatchAll
function MatchAll(const aExpression, aHtml: string; i: Integer = 0): TStringDynArray;
var
ResultArray: TStringDynArray;
Match: TMatch;
begin
SetLength(ResultArray, 0);
for Match in TRegEx.Matches(aHtml, aExpression, [roMultiLine]) do
begin
SetLength(ResultArray, length(ResultArray) + 1);
ResultArray[length(ResultArray) - 1] := Match.Groups[i].Value.Trim;
end;
Result := ResultArray;
end;
And my version of GetUrlData
function RandomNext(const AFrom, ATo: Integer): string;
begin
Result := IntToStr(Random(ATo - AFrom) + AFrom);
end;
function GetUrlData(const Url: string): string;
var
Client: TIdHTTP;
begin
Client := TIdHTTP.Create(nil);
// Random IP Address
Client.Request.CustomHeaders.AddValue('X-Forwarded-For', Format('%d.%d.%d.%d', [Random(255), Random(255), Random(255), Random(255)]));
// Random User-Agent
Client.Request.CustomHeaders.AddValue('User-Agent', 'Mozilla/' + RandomNext(3, 5) + '.0 (Windows NT ' + RandomNext(3, 5) + '.' + RandomNext(0, 2) + '; rv:2.0.1) Gecko/20100101 Firefox/' +
RandomNext(3, 5) + '.' + RandomNext(0, 5) + '.' + RandomNext(0, 5));
Result := Client.Get(Url);
FreeAndNil(Client);
end;
Given this URL : http://www.google.com/search?q=imdb+13%20sins
How do I extract the IMDB url's?
Jens Borrisholt
This did the tick: www\.imdb\.com/title/tt\d{7}