Search code examples
vb.netstringparsingstreamreader

Streamread section of text file to string


Using VB.NET. I am trying create a string of spatial coordinates for use as a parameter in querying a web service. The coordinates need to be extracted from a text file (a standard output from a GIS) which could be of any length.

The web service needs to see the string of coordinate pairs in a 'well known text' format separated by a comma. E.g. -2.780299 53.373266, -2.780606 53.372617, -2.782207 53.370392, -2.784552 53.362061

(superficially similar to a question asked here )

The text file is in this structure. To reiterate i need a string containing only the coordinates (pairs separated by a comma) after "161" and before "pen". 161 is not a consistent number though the word "Region" is.

            Version   300
            Charset "WindowsLatin1"
            Delimiter ","
            CoordSys Earth Projection 1, 104
            Columns 1
              id Char(10)
            Data

            Region  1
              161
            -2.780299 53.373266
            -2.780312 53.373235
            -2.780535 53.372905
            -2.780606 53.372617
            -2.780844 53.372234
            -2.781227 53.371664
            -2.781239 53.371646
            -2.781409 53.371407
            -2.781779 53.3709
            -2.782101 53.370549
            -2.782204 53.370402
            -2.782207 53.370392
            -2.781142 53.37014
            -2.780263 53.369894
            -2.780314 53.369698
            -2.780378 53.36953
            -2.780453 53.369362
            -2.780532 53.369223
            -2.780611 53.369101
            -2.780826 53.368803
            -2.781468 53.367978
            -2.781768 53.367608
            -2.78184 53.367497
            -2.781863 53.367458
            -2.781806 53.367446
            -2.781751 53.367435
            -2.781712 53.367428
            -2.781733 53.367398
            -2.781753 53.367369
            -2.781847 53.36723
            -2.781949 53.367077
            -2.781866 53.367046
            -2.781492 53.366907
            -2.781182 53.36682
            -2.781269 53.366388
            -2.780878 53.366173
            -2.781063 53.365894
            -2.781693 53.365255
            -2.782277 53.364606
            -2.782594 53.364311
            -2.782652 53.364272
            -2.782676 53.364242
            -2.782687 53.364205
            -2.78268 53.364158
            -2.782643 53.364085
            -2.782643 53.364035
            -2.782943 53.363738
            -2.783346 53.363266
            -2.783736 53.362905
            -2.784254 53.362383
            -2.784552 53.362061
            -2.784617 53.361959
            -2.784113 53.361843
            -2.783923 53.361764
            -2.783968 53.361533
            -2.783978 53.361504
            -2.784358 53.361226
            -2.784823 53.360723
            -2.784833 53.360712
            -2.785201 53.360193
            -2.785831 53.359301
            -2.785832 53.359301
            -2.786182 53.358787
            -2.786484 53.358288
            -2.786318 53.35821
            -2.787152 53.356913
            -2.787255 53.356614
            -2.787199 53.356333
            -2.787559 53.356315
            -2.790294 53.356237
            -2.791854 53.35619
            -2.792974 53.356147
            -2.794152 53.356092
            -2.794698 53.35605
            -2.79521 53.356012
            -2.795333 53.356006
            -2.795732 53.355982
            -2.79586 53.355973
            -2.796257 53.355942
            -2.797842 53.355898
            -2.797931 53.355936
            -2.798299 53.355921
            -2.798399 53.355926
            -2.798727 53.355937
            -2.799055 53.355943
            -2.799382 53.355944
            -2.799711 53.355942
            -2.799919 53.355939
            -2.800248 53.355929
            -2.800575 53.355915
            -2.800901 53.355898
            -2.802135 53.355819
            -2.803302 53.355749
            -2.804436 53.355658
            -2.805646 53.355515
            -2.807964 53.355207
            -2.809126 53.355058
            -2.809593 53.354684
            -2.810372 53.354184
            -2.81147 53.353449
            -2.81147 53.353444
            -2.811472 53.353428
            -2.811474 53.353413
            -2.811477 53.353397
            -2.811481 53.353381
            -2.811487 53.353365
            -2.811492 53.35335
            -2.811506 53.353325
            -2.811538 53.35328
            -2.811578 53.353237
            -2.811617 53.353193
            -2.811622 53.353188
            -2.811663 53.353145
            -2.811704 53.353101
            -2.811749 53.353069
            -2.811852 53.352994
            -2.811875 53.352979
            -2.811983 53.352906
            -2.812094 53.352834
            -2.812209 53.352764
            -2.812324 53.352695
            -2.812443 53.352629
            -2.813163 53.352174
            -2.81362 53.351928
            -2.813782 53.352417
            -2.814005 53.353349
            -2.814147 53.354287
            -2.814206 53.355228
            -2.814183 53.356169
            -2.814077 53.357109
            -2.813889 53.358043
            -2.813619 53.358971
            -2.813268 53.359889
            -2.812837 53.360795
            -2.812327 53.361686
            -2.81174 53.36256
            -2.811077 53.363414
            -2.81034 53.364247
            -2.809531 53.365055
            -2.808652 53.365837
            -2.807706 53.366591
            -2.806694 53.367313
            -2.805621 53.368004
            -2.804489 53.36866
            -2.803301 53.369279
            -2.802059 53.369861
            -2.800769 53.370403
            -2.799433 53.370904
            -2.798055 53.371362
            -2.796638 53.371777
            -2.795187 53.372147
            -2.793705 53.372471
            -2.792197 53.372749
            -2.790666 53.372979
            -2.789118 53.37316
            -2.787556 53.373294
            -2.785984 53.373378
            -2.784407 53.373413
            -2.782829 53.373399
            -2.781254 53.373335
            -2.780299 53.373266
                Pen (2,2,16711680) 
                Brush (5,16711680)
                Center -2.797227 53.362684

As always any help of direction would be appreciated. The existing code, which works over old SOAP based services, looks like the following. However, it uses a custom library that i will no longer be able to access and don't know how to replace. It also makes use of a spatial system (easting and northings) that are no longer recognised by the new services.

There is the potential i may be able to convert the spatial system within the program so if it is only possible to get this working without the need for the NBN library that would be a big step in the right direction.

            Function GetPolyFromMIF(ByVal strMIF As String) As NBN.Polygon

                    Dim sr As StreamReader = New StreamReader(strMIF)
                    Dim sLine As String = sr.ReadLine
                    Dim iCoords As Integer
                    Dim iCoord As Integer
                    Dim sCoords() As String
                    Dim dblEasting As Double
                    Dim dblNorthing As Double
                    Dim coords() As NBN.Coordinate
                    Dim bPolyComplete As Boolean = False
                    Dim poly As NBN.Polygon = New NBN.Polygon

                    Do While Not sr.EndOfStream And Not bPolyComplete

                        If sLine.StartsWith("Region") Then

                            iCoords = Convert.ToInt16(sr.ReadLine)
                            ReDim coords(iCoords)
                            For iCoord = 1 To iCoords
                                sCoords = sr.ReadLine.Split(" ")
                                dblEasting = Convert.ToDouble(sCoords(0))
                                dblNorthing = Convert.ToDouble(sCoords(1))
                                coords(iCoord - 1) = New NBN.Coordinate
                                coords(iCoord - 1).x = dblEasting
                                coords(iCoord - 1).y = dblNorthing
                            Next
                            poly.srs = NBN.SpatialReferenceSystem.osgb_BNG
                            Dim boundary As NBN.PolygonBoundary = New NBN.PolygonBoundary
                            boundary.Ring = coords
                            poly.Boundary = boundary
                            bPolyComplete = True
                        Else
                            sLine = sr.ReadLine
                        End If
                    Loop

                    Return poly
                End Function

Solution

  • I would process the lines with a regex. Load each line into your parser line by line but without detecting anything but matching to a regex pattern. By taking this approach you can process any size of file and only retrieve those values from the file that you are interested in.

    If you want me to post an explanation of what the Regex does, let me know and I can do that too. If you want to figure out the regex yourself, type regex cheat sheet into google and pick any result that you might like to learn more about regex matching.

    My code is based upon the following assumption. Each of the coordinates are on a separate line as was implied by the file format that you posted. I also assumed that you need to deliver the coordinates to your system in sets of four. You mentioned the well known format was two coordinates followed by a comma and two more coordinates. The code I wrote gives you a list of a class that I included and each instance of that class will have the four coordinates that make up a set in properties and a property that returns them in the format you wanted. The list cannot have a coordinate set added to it that is not valid. You might have a bug in my code in that it might miss the last coordinate, I never tested it that far.

    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        Dim lst As New List(Of CoordinateSet)
        Using sr As New System.IO.StreamReader("c:\temp\coords.txt")
            Dim line As String = sr.ReadLine
            Dim counter As Integer = 0
            Dim coord As New CoordinateSet
            While line IsNot Nothing
                If coord.CoordSet IsNot Nothing Then
                    lst.Add(coord.Clone)
                    coord = New CoordinateSet
                End If
                If IsMatch(line.Trim) Then
                    If coord.One Is Nothing Then
                        coord.One = line.Trim
                    ElseIf coord.Two Is Nothing Then
                        coord.Two = line.Trim
                    ElseIf coord.Three Is Nothing Then
                        coord.Three = line.Trim
                    ElseIf coord.Four Is Nothing Then
                        coord.Four = line.Trim
                    End If
                End If
                line = sr.ReadLine
            End While
        End Using
        MessageBox.Show(String.Format("I found {0} coordinate pairs.", lst.Count))
    End Sub
    
    Private Function IsMatch(line As String) As Boolean
        Dim reg As New System.Text.RegularExpressions.Regex("^[-]?[0-9]+[\.]+[0-9]+[ ][-]?[0-9]+[\.]+[0-9]+")
        Return reg.IsMatch(line)
    End Function
    
    Public Class CoordinateSet
        Implements ICloneable
        Public Property One As String
        Public Property Two As String
        Public Property Three As String
        Public Property Four As String
        Public ReadOnly Property CoordSet As String
            Get
                If Not String.IsNullOrEmpty(One) AndAlso Not String.IsNullOrEmpty(Two) AndAlso Not String.IsNullOrEmpty(Three) AndAlso Not String.IsNullOrEmpty(Four) Then
                    Return String.Format("{0} {1}, {2} {3}", One, Two, Three, Four)
                Else
                    Return Nothing
                End If
            End Get
        End Property
    
        Public Function Clone() As Object Implements ICloneable.Clone
            Return MemberwiseClone()
        End Function
    End Class