Search code examples
.netvb.netstack-overflow

Quick (hopefully) one regarding StackOverflowException


I am writing a .net web spider. while it works great on one of my sites (about 20 pages) it bombs with a System.StackOverflowException with another site i manage (about 500 pages).

I am developing this on a win7 64bit i3 laptop with 8g of ram, a 128g hyperx ssd and no swap file.

My question is.... am i getting this exception thrown because i have no swap file?

cpu usage (vs2010 debug process) only gets to around 34% with only 74-75m ram usage.

if this is the case, how can i ensure that it wont happen?

This is with no recursion.

Code:

Imports System.Reflection
Imports System.Net
Imports Superstar.Html.Linq

Public Class Downloader
Implements IDisposable

''' <summary>
''' Get the returned downloaded string
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnString As String
    Get
        Return _StrReturn
    End Get
End Property
Private Property _StrReturn As String

''' <summary>
''' Get the returned downloaded byte array
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property ReturnBytes As Byte()
    Get
        Return _FSReturn
    End Get
End Property
Private Property _FSReturn As Byte()


Private Property _UserAgent As String = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"
Private Property DataReceived As Boolean = False

''' <summary>
''' Download a string, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadString(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
    SetAllowUnsafeHeaderParsing20()
    Using wc As New Net.WebClient()
        With wc
            Dim _ct As Long = 0
            DataReceived = False
            .Headers.Add("user-agent", _UserAgent)
            .DownloadStringAsync(New System.Uri(_Path))
            AddHandler .DownloadStringCompleted, AddressOf StringDownloaded
            Do While Not DataReceived
                If _Worker IsNot Nothing Then
                    _ct += 1
                    ReportProgress(_ct, _Worker)
                End If
            Loop
        End With
    End Using
End Sub

''' <summary>
''' Download a file, but do not block the calling thread
''' </summary>
''' <param name="_Path"></param>
''' <remarks></remarks>
Public Sub DownloadFile(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing)
    SetAllowUnsafeHeaderParsing20()
    Using wc As New Net.WebClient()
        With wc
            Dim _ct As Long = 0
            DataReceived = False
            .Headers.Add("user-agent", _UserAgent)
            .DownloadDataAsync(New System.Uri(_Path))
            AddHandler .DownloadDataCompleted, AddressOf FileStreamDownload
            Do While Not DataReceived
                If _Worker IsNot Nothing Then
                    _ct += 1
                    ReportProgress(_ct, _Worker)
                End If
            Loop
        End With
    End Using
End Sub

''' <summary>
''' Download a parsable HDocument, for using HtmlToLinq
''' </summary>
''' <param name="_Path"></param>
''' <returns></returns>
''' <remarks></remarks>
Public Function DownloadHDoc(ByVal _Path As String, Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As HDocument
    Try
        '
        '
        '
        '
        '
        '
        'StackOverFlowException Occurring Here!
        DownloadString(_Path, _Worker)
        Return HDocument.Parse(_StrReturn)
    Catch soex As StackOverflowException
        'put some logging in here, with the path attempted
        Return Nothing
    Catch ex As Exception
        SetAllowUnsafeHeaderParsing20()
        Return HDocument.Load(_Path)
    End Try
End Function

#Region "Internals"

Private Sub SetAllowUnsafeHeaderParsing20()
    Dim a As New System.Net.Configuration.SettingsSection
    Dim aNetAssembly As System.Reflection.Assembly = Assembly.GetAssembly(a.GetType)
    Dim aSettingsType As Type = aNetAssembly.GetType("System.Net.Configuration.SettingsSectionInternal")
    Dim args As Object() = Nothing
    Dim anInstance As Object = aSettingsType.InvokeMember("Section", BindingFlags.Static Or BindingFlags.GetProperty Or BindingFlags.NonPublic, Nothing, Nothing, args)
    Dim aUseUnsafeHeaderParsing As FieldInfo = aSettingsType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic Or BindingFlags.Instance)
    aUseUnsafeHeaderParsing.SetValue(anInstance, True)
End Sub

Private Sub FileStreamDownload(ByVal sender As Object, ByVal e As DownloadDataCompletedEventArgs)
    If e.Cancelled = False AndAlso e.Error Is Nothing Then
        DataReceived = True
        _FSReturn = DirectCast(e.Result, Byte())
    Else
        _FSReturn = Nothing
    End If
End Sub

Private Sub StringDownloaded(ByVal sender As Object, ByVal e As DownloadStringCompletedEventArgs)
    If e.Cancelled = False AndAlso e.Error Is Nothing Then
        DataReceived = True
        _StrReturn = DirectCast(e.Result, String)
    Else
        _StrReturn = String.Empty
    End If
End Sub

#End Region

#Region "IDisposable Support"
Private disposedValue As Boolean ' To detect redundant calls

' IDisposable
Protected Overridable Sub Dispose(disposing As Boolean)
    If Not Me.disposedValue Then
        If disposing Then
        End If
        _StrReturn = String.Empty
        _FSReturn = Nothing
    End If
    Me.disposedValue = True
End Sub

Public Sub Dispose() Implements IDisposable.Dispose
    Dispose(True)
    GC.SuppressFinalize(Me)
End Sub
#End Region

End Class

And the code that calls this where the stackoverflow is happenning

Private Function PopulateSEOList(Optional ByVal _Worker As ComponentModel.BackgroundWorker = Nothing) As List(Of Typing.SEO)
    Dim _L = LinkList, _Ct As Long = 0
    Dim _NL As New List(Of Typing.SEO)
    Dim _EL As Typing.SEO.Elements = Nothing
    Dim _Doc As HDocument = Nothing, _Keywords As String = String.Empty, _Description As String = String.Empty, _Content As HElement = Nothing
    For i As Long = 0 To _L.Count - 1
        Try
            _Ct += 1
            Using _HDoc As New Downloader
                With _HDoc
                    _Doc = .DownloadHDoc(_L(i).SiteUrl)
                End With
            End Using
            Tasks.Parallel.Invoke(Sub()
                                      'Keywords
                                      For Each Item In _Doc.Descendants("meta")
                                          If Item.Attribute("name") = "keywords" Then
                                              _Keywords = Item.Attribute("content").Value
                                              'Exit For
                                          End If
                                      Next
                                  End Sub,
                                  Sub()
                                      'Description
                                      For Each Item In _Doc.Descendants("meta")
                                          If Item.Attribute("name") = "description" Then
                                              _Description = Item.Attribute("content").Value
                                              'Exit For
                                          End If
                                      Next
                                  End Sub,
                                  Sub()
                                      If _Doc.Descendants("body") IsNot Nothing Then
                                          _Content = _Doc.Descendants("body").FirstOrDefault
                                      End If
                                  End Sub,
                                  Sub()
                                      _EL = New Typing.SEO.Elements() With {
                                        .H1 = If(_Doc.Descendants("h1") IsNot Nothing, (From n In _Doc.Descendants("h1").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .H2 = If(_Doc.Descendants("h2") IsNot Nothing, (From n In _Doc.Descendants("h2").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .H3 = If(_Doc.Descendants("h3") IsNot Nothing, (From n In _Doc.Descendants("h3").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .H4 = If(_Doc.Descendants("h4") IsNot Nothing, (From n In _Doc.Descendants("h4").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .H5 = If(_Doc.Descendants("h5") IsNot Nothing, (From n In _Doc.Descendants("h5").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .H6 = If(_Doc.Descendants("h6") IsNot Nothing, (From n In _Doc.Descendants("h6").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .UL = If(_Doc.Descendants("ul") IsNot Nothing, (From n In _Doc.Descendants("ul").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .OL = If(_Doc.Descendants("ol") IsNot Nothing, (From n In _Doc.Descendants("ol").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .STRONG = If(_Doc.Descendants("strong") IsNot Nothing OrElse _Doc.Descendants("b") IsNot Nothing,
                                                     (From n In _Doc.Descendants("strong").AsParallel()
                                                     Select n.Value).Union(From n In _Doc.Descendants("b").AsParallel()
                                                     Select n.Value).ToList(), Nothing),
                                        .BLOCKQUOTE = If(_Doc.Descendants("blockquote") IsNot Nothing, (From n In _Doc.Descendants("blockquote").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .EM = If(_Doc.Descendants("em") IsNot Nothing OrElse _Doc.Descendants("i") IsNot Nothing,
                                                 (From n In _Doc.Descendants("em").AsParallel()
                                                 Select n.Value).Union(From n In _Doc.Descendants("i").AsParallel()
                                                 Select n.Value).ToList(), Nothing),
                                        .A = If(_Doc.Descendants("a") IsNot Nothing, (From n In _Doc.Descendants("a").AsParallel()
                                            Select New Typing.SEO.Elements.Links() With {
                                                .Content = n.Value,
                                                .Title = If(n.Attribute("title") IsNot Nothing,
                                                            n.Attribute("title").Value,
                                                            Nothing),
                                                .Target = If(n.Attribute("target") IsNot Nothing,
                                                            n.Attribute("target").Value,
                                                            Nothing),
                                                .Rel = If(n.Attribute("rel") IsNot Nothing,
                                                            n.Attribute("rel").Value,
                                                            Nothing),
                                                .Href = If(n.Attribute("href") IsNot Nothing,
                                                            n.Attribute("href").Value,
                                                            Nothing)
                                            }).ToList(), Nothing),
                                        .IMG = If(_Doc.Descendants("img") IsNot Nothing,
                                                  (From n In _Doc.Descendants("img").AsParallel()
                                                   Select New Typing.SEO.Elements.Images() With {
                                                       .Alt = If(n.Attribute("alt") IsNot Nothing,
                                                            n.Attribute("alt").Value,
                                                            Nothing),
                                                       .Source = If(n.Attribute("src") IsNot Nothing,
                                                            n.Attribute("src").Value,
                                                            Nothing),
                                                       .Title = If(n.Attribute("title") IsNot Nothing,
                                                            n.Attribute("title").Value,
                                                            Nothing)
                                                   }).ToList(),
                                                Nothing)
                                      }
                                  End Sub)
            _NL.Add(New Typing.SEO() With {
                    .Link = _L(i).SiteUrl,
                    .Title = _Doc.Descendants("title").First().Value,
                    .Keywords = _Keywords,
                    .Description = _Description,
                    .Content = _Content,
                    .ContentElements = _EL
                })
            _L.RemoveAt(i)
            _EL = Nothing : _Doc = Nothing
            ReportProgress((_Ct / _L.Count) * 100, _Worker)
        Catch ex As Exception
            'Put logging in here
        End Try
    Next
    Return _NL
End Function

Solution

  • As you probably know, this error is most likely due to a bug in code causing infinite looping in a recursive algorithm. Although you say you don't use recursion, you probably have recursion happening inadvertently.

    The easiest way to figure out what's causing it is to attach the debugger, configure Visual Studio to break on exceptions, and trigger the error in your application.

    When the error occurs and the debugger breaks, have a look at the call stack - hopefully you'll see what the problem is there.