⑴ 正則表達式如何過濾HTML標簽中的屬性值
去掉html標簽: str.replace(/</?[a-zA-Z]+[^><]*>/g,"")
去掉標簽裡面的屬性: str.replace(/<([a-zA-Z]+)\s*[^><]*>/g,"<$1>")
我親自測試通過,操作語言專javascript 樓主還有問題的屬話Hi 我
⑵ 怎麼過濾html標簽
過濾html標簽代碼如下:
public string checkStr(string html)
{
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" on[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
html = regex1.Replace(html, ""); //過濾<script></script>標記
html = regex2.Replace(html, ""); //過濾href=javascript: (<A>) 屬性
html = regex3.Replace(html, " _disibledevent="); //過濾其它控制項的on...事件
html = regex4.Replace(html, ""); //過濾iframe
html = regex5.Replace(html, ""); //過濾frameset
html = regex6.Replace(html, ""); //過濾frameset
html = regex7.Replace(html, ""); //過濾frameset
html = regex8.Replace(html, ""); //過濾frameset
html = regex9.Replace(html, "");
html = html.Replace(" ", "");
html = html.Replace("</strong>", "");
html = html.Replace("<strong>", "");
return html;
}
⑶ vb用代碼或系統控制項 去除HTML語法
搜索字元串
==========================
用遞歸搜索嘛
給一個自己寫的函數你參考 與你的功能無關 但主要用與處理HTML源碼
'*************************************************************************
'**函 數 名:FindStr
'**中文意譯:
'**輸 入:ByVal vSourceStr(String) -
'** :ByVal vFunType(Integer) -
'** :Optional ByVal vsStr(String) -
'** :Optional ByVal veStr(String) -
'**輸 出:(String) -
'**功能描述:
'** :
'**作 者:秋色烽火
'**日 期:2007-11-20 22:02:05
'*************************************************************************
Public Function FindStr(ByVal vSourceStr As String, ByVal vFunType As Integer, Optional ByVal vsStr As String, Optional ByVal veStr As String) As String
Dim sourceStr, sourceStrtemp, sourceStrtemp2, sStr, eStr, S, E, opStr
'"頭部前<b>實體內容</b>尾部後"
sourceStr = vSourceStr
sStr = vsStr
eStr = veStr
Select Case vFunType
Case 0 '實體內容
S = InStr(sourceStr, sStr)
If S <> 0 Then
sourceStr = Mid$(sourceStr, S + Len(sStr))
E = InStr(sourceStr, eStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1)
Else
FindStr = ""
End If
End If
'**********************
Case 1 '<b>實體內容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr & eStr
'**********************
Case 2 '<b>實體內容
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr
'**********************
Case 3 '實體內容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sourceStr & eStr
'**********************
Case 4 '頭部前<b>實體內容</b>
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 1, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 5 '頭部前<b>實體內容
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 2, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 6 '<b>實體內容</b>尾部後
S = InStr(sourceStr, sStr)
If S <> 0 Then
FindStr = Mid$(sourceStr, S)
Else
FindStr = ""
End If
'**********************
Case 7 '實體內容</b>尾部後
S = InStr(sourceStr, sStr)
If S <> 0 Then
FindStr = Mid$(sourceStr, S + Len(sStr))
Else
FindStr = ""
End If
'**********************
Case 8 '1 多項結果返回 遞歸調用循環返回用$分隔的多項結果 主要用於split侵害
sourceStrtemp = FindStr(sourceStr, 7, sStr, eStr)
Do While sourceStrtemp <> ""
E = InStr(sourceStrtemp, eStr)
If E <> 0 Then
opStr = opStr & "$" & Mid$(sourceStrtemp, 1, E - 1)
sourceStrtemp = FindStr(Mid$(sourceStrtemp, E + Len(eStr)), 7, sStr, eStr)
End If
Loop
FindStr = opStr
'**********************
Case 9 '從右向左匹配字元串
For i = Len(sourceStr) To 1 Step -1
sourceStrtemp = sourceStrtemp & Mid$(sourceStr, i, 1)
Next
For i = Len(sStr) To 1 Step -1
sourceStrtemp2 = sourceStrtemp2 & Mid$(sStr, i, 1)
Next
S = InStr(sourceStrtemp, sourceStrtemp2)
If S <> 0 Then
sourceStrtemp = Mid$(sourceStrtemp, 1, S - 1)
sourceStrtemp2 = ""
For i = Len(sourceStrtemp) To 1 Step -1
sourceStrtemp2 = sourceStrtemp2 & Mid$(sourceStrtemp, i, 1)
Next
FindStr = sourceStrtemp2
Else
FindStr = ""
End If
End Select
End Function
⑷ VB 字元串操作,去掉html代碼
工程>引用>Microsoft VBScript Regular Expressions 5.5
'新建窗體,添加command1
Private Sub Command1_Click()
Dim mystr As String, re As RegExp
mystr = "<a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_0')" & "href=" & """" & "http://movie.gougou.com/Sections/movies?search=%b7%b6%a1%a4%b5%cf%c8%fb%b6%fb&searchby=2&page=1" & """" & " target='_blank'>范·迪塞爾</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_1')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?" & _
"search=%b1%a3%c2%de%a1%a4%ce%d6%bf%cb&searchby=2&page=1 " & """" & "target='_blank'>保羅·沃克</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_2')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?search=%c3%d7%d0%aa%b6%fb%a1%a4%c2%de%b5%c2%c0%ef%b8%f1%d7%c8&searchby=2&page=1" & """" & " target='_blank'>米歇爾" & _
"·羅德里格茲</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_3')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?search=%c7%c7%b5%a4%c4%c8%a1%a4%b2%bc%c2%b3%cb%b9%cc%d8&searchby=2&page=1" & """" & " target='_blank'>喬丹娜·布魯斯特</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_4')" & """" & _
"href=" & """" & "http://movie.gougou.com/Sections/movies?search=%41%6c%6f%6e%73%6f&searchby=2&page=1" & """" & " target='_blank'>Alonso</a> "
Set re = New RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "<a.*?onmousedown.+?blank.+?>|</a>"
MsgBox re.Replace(mystr, "")
Set re = Nothing
End Sub
⑸ 用vb讀取一個html文件,去掉標簽只顯示其中文本內容
用webbrower控制項
請看下例
』聲明:該程序由csdn論壇獲得
dim dwinfolder as new shellwindows
dim withevents eventie as webbrowser_v1
private sub command1_click()
dim objie as object
for each objie in dwinfolder
if objie.locationurl = list1.list(list1.listindex) then
set eventie = objie
command1.enabled = false
list1.enabled = false
text1.text = ""
exit for
end if
next
end sub
private sub eventie_navigatecomplete(byval url as string)
text1.text = text1.text + chr(13) + chr(10) + url
end sub
在運行前。點擊菜單 projects | references 項,在available references 列表中選擇microsoft internet controls項將internet對象引用介入到工程中
private sub form_load()
dim objie as object
for each objie in dwinfolder
if instr(1, objie.fullname, "iexplore.exe", vbtextcompare) <> 0 then
list1.additem objie.locationurl
end if
next
command1.caption = "正文"
end sub
private sub form_unload(cancel as integer)
set dwinfolder = nothing
end sub
private sub list1_click()
dim objdoc as object
dim objie as object
for each objie in dwinfolder
if objie.locationurl = list1.list(list1.listindex) then
set objdoc = objie.document
for i = 1 to objdoc.all.length - 1
if objdoc.all(i).tagname = "body" then
text1.text = objdoc.all(i).innertext
end if
next
exit for
end if
next
end sub
⑹ 如何過濾HTML標簽,或者讀取數據時,去處HTML標簽
如果你把html標簽除掉了問題會更大。
如果你不需要所見即所得的編輯器,那麼可以直接使用textarea。在把用戶輸入的html標簽過濾掉就行了。
⑺ 字元串中如何過濾HTML標簽字元
下面是asp中的方法,你可以改造成.net的
Function FilterHTML(strToFilter)
Dim strTemp
strTemp = strToFilter
strTemp=replace(strTemp,"""","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp,"&","")
Dim n,m '定義三個變數
n = inStr(strTemp,"<") '找到第一個"<"所在的位置
m = inStr(strTemp,">") '找到第一個">"所在的位置
Do while n > 0 and n < m '如果n>0則說明找到了一個"<",如果n<m則說明"<"在">"的左邊,則"<"和">"之間的字元串為HTML代碼,需要過濾掉
strTemp = Left(strTemp,n-1) & Mid(strTemp,m+1) '取"<"左邊的字元串和">"右邊的字元串並將他們連接在一起
n = inStr(strTemp,"<") '找到剩餘字元串中第一個"<"所在的位置
m = inStr(strTemp,">") '找到剩餘字元串中第一個">"所在的位置
Loop '循環
FilterHTML = strTemp
End Function
⑻ vb過濾html圖片標簽的正則表達式
<img.*?>