使用 RegularExpressions 取得網頁的錨點

摘要:使用 RegularExpressions 取得網頁的錨點

這陣子在做HTML Editor時
使用者希望可以輸入超連結後,同時還可挑選這超連結的錨點
實作出來後,和大家分享一下  ^^

.aspx檔

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="getHtmlAnchor.aspx.cs" Inherits="getHtmlAnchor" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
    <title>未命名頁面</title>
</head>
<body>
    <form id="form1" runat="server">
    <div>網址:    
        <asp:TextBox ID="TextBox1" runat="server" Width="500px"></asp:TextBox>
        <br />
        <br />
        <asp:Button ID="Button1" runat="server" onclick="Button1_Click" Text="取得錨點" />
        <asp:Label ID="Label1" runat="server" Text=""></asp:Label>
        <br />
        <asp:ListBox ID="ListBox1" runat="server" Rows="10" Width="500px"></asp:ListBox>
    </div>
    </form>
</body>
</html>

.cs檔 

using System;
using System.Collections;
using System.Configuration;
using System.Data;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.HtmlControls;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;


public partial class getHtmlAnchor : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {

    }

    protected void Button1_Click(object sender, EventArgs e)
    {
        //先將錨點列表清空
        ListBox1.Items.Clear();

          //判斷網址是否正確
        HttpWebRequest request;
        HttpWebResponse response;
        Stream stream = null;
        Boolean isOk = false;
        try
        {            
            request = (HttpWebRequest)WebRequest.Create(TextBox1.Text);
            response = (HttpWebResponse)request.GetResponse();
            stream = response.GetResponseStream();
            isOk = true;
            Label1.Text = "網址輸入正確";
        }

        catch (Exception)
        {
            Label1.Text = "網址輸入不正確,請重新輸入";
            //throw;
        }

        finally
        {
            request = null;
            response = null;
        }


          //網址輸入正確
        if (isOk)
        {
            //取得輸入網址的內容
            string html = new StreamReader(stream).ReadToEnd();

              //利用 RegularExpressions 來取得 超連結錨點,依序加入錨點列表裡
            //<a id="top"/> <a name="top"/> 都是錨點
            foreach (Match m in Regex.Matches(html, "(?i)[<](?<tag>\\w+)\\s[^>]*?(?<name>id|name)=['\"](?<url>.*?)[\"'].*?[>]"))
            {
                if (m.Groups["tag"].ToString().ToLower() == "a")
                {
                    ListBox1.Items.Add(m.Groups["url"].ToString());
                }

            }

        }

    }

}

執行結果

原始碼下載