Torna al Thread

Private Function ConvertHTMLTablesToDataSet(ByVal SourceHTML As String) As DataSet ' Declarations Dim ds As New DataSet Dim dt As DataTable Dim dr As DataRow Dim dc As DataColumn Dim TableExpression As String = "<table[^>]*>(.*?)</table>" Dim HeaderExpression As String = "<th[^>]*>(.*?)</th>" Dim RowExpression As String = "<tr[^>]*>(.*?)</tr>" Dim ColumnExpression As String = "<td[^>]*>(.*?)</td>" Dim HeadersExist As Boolean = False Dim iCurrentColumn As Integer = 0 Dim iCurrentRow As Integer = 0 ' Get a match for all the tables in the HTML Dim Tables As MatchCollection = Regex.Matches(SourceHTML, TableExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase) ' Loop through each table element For Each Table As Match In Tables ' Reset the current row counter and the header flag iCurrentRow = 0 HeadersExist = False ' Add a new table to the DataSet dt = New DataTable ' Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names) If Table.Value.Contains("<th") Then ' Set the HeadersExist flag HeadersExist = True ' Get a match for all the rows in the table Dim Headers As MatchCollection = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase) ' Loop through each header element For Each Header As Match In Headers dt.Columns.Add(Header.Groups(1).ToString) Next Else For iColumns As Integer = 1 To Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Item(0).ToString, RowExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Item(0).ToString, ColumnExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase).Count dt.Columns.Add("Column " & iColumns) Next End If ' Get a match for all the rows in the table Dim Rows As MatchCollection = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase) ' Loop through each row element For Each Row As Match In Rows ' Only loop through the row if it isn't a header row If Not (iCurrentRow = 0 And HeadersExist = True) Then ' Create a new row and reset the current column counter dr = dt.NewRow iCurrentColumn = 0 ' Get a match for all the columns in the row Dim Columns As MatchCollection = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline Or RegexOptions.Singleline Or RegexOptions.IgnoreCase) ' Loop through each column element For Each Column As Match In Columns ' Add the value to the DataRow dr(iCurrentColumn) = Column.Groups(1).ToString ' Increase the current column iCurrentColumn += 1 Next ' Add the DataRow to the DataTable dt.Rows.Add(dr) End If ' Increase the current row counter iCurrentRow += 1 Next ' Add the DataTable to the DataSet ds.Tables.Add(dt) Next Return ds End Function
Copyright © dotNetHell.it 2002-2024
Running on Windows Server 2008 R2 Standard, SQL Server 2012 & ASP.NET 3.5