⑴ 正则表达式如何过滤HTML标签中的属性值
去掉html标签: str.replace(/</?[a-zA-Z]+[^><]*>/g,"")
去掉标签里面的属性: str.replace(/<([a-zA-Z]+)\s*[^><]*>/g,"<$1>")
我亲自测试通过,操作语言专javascript 楼主还有问题的属话Hi 我
⑵ 怎么过滤html标签
过滤html标签代码如下:
public string checkStr(string html)
{
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" on[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
html = regex1.Replace(html, ""); //过滤<script></script>标记
html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
html = regex4.Replace(html, ""); //过滤iframe
html = regex5.Replace(html, ""); //过滤frameset
html = regex6.Replace(html, ""); //过滤frameset
html = regex7.Replace(html, ""); //过滤frameset
html = regex8.Replace(html, ""); //过滤frameset
html = regex9.Replace(html, "");
html = html.Replace(" ", "");
html = html.Replace("</strong>", "");
html = html.Replace("<strong>", "");
return html;
}
⑶ vb用代码或系统控件 去除HTML语法
搜索字符串
==========================
用递归搜索嘛
给一个自己写的函数你参考 与你的功能无关 但主要用与处理HTML源码
'*************************************************************************
'**函 数 名:FindStr
'**中文意译:
'**输 入:ByVal vSourceStr(String) -
'** :ByVal vFunType(Integer) -
'** :Optional ByVal vsStr(String) -
'** :Optional ByVal veStr(String) -
'**输 出:(String) -
'**功能描述:
'** :
'**作 者:秋色烽火
'**日 期:2007-11-20 22:02:05
'*************************************************************************
Public Function FindStr(ByVal vSourceStr As String, ByVal vFunType As Integer, Optional ByVal vsStr As String, Optional ByVal veStr As String) As String
Dim sourceStr, sourceStrtemp, sourceStrtemp2, sStr, eStr, S, E, opStr
'"头部前<b>实体内容</b>尾部后"
sourceStr = vSourceStr
sStr = vsStr
eStr = veStr
Select Case vFunType
Case 0 '实体内容
S = InStr(sourceStr, sStr)
If S <> 0 Then
sourceStr = Mid$(sourceStr, S + Len(sStr))
E = InStr(sourceStr, eStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1)
Else
FindStr = ""
End If
End If
'**********************
Case 1 '<b>实体内容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr & eStr
'**********************
Case 2 '<b>实体内容
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr
'**********************
Case 3 '实体内容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sourceStr & eStr
'**********************
Case 4 '头部前<b>实体内容</b>
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 1, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 5 '头部前<b>实体内容
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 2, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 6 '<b>实体内容</b>尾部后
S = InStr(sourceStr, sStr)
If S <> 0 Then
FindStr = Mid$(sourceStr, S)
Else
FindStr = ""
End If
'**********************
Case 7 '实体内容</b>尾部后
S = InStr(sourceStr, sStr)
If S <> 0 Then
FindStr = Mid$(sourceStr, S + Len(sStr))
Else
FindStr = ""
End If
'**********************
Case 8 '1 多项结果返回 递归调用循环返回用$分隔的多项结果 主要用于split侵害
sourceStrtemp = FindStr(sourceStr, 7, sStr, eStr)
Do While sourceStrtemp <> ""
E = InStr(sourceStrtemp, eStr)
If E <> 0 Then
opStr = opStr & "$" & Mid$(sourceStrtemp, 1, E - 1)
sourceStrtemp = FindStr(Mid$(sourceStrtemp, E + Len(eStr)), 7, sStr, eStr)
End If
Loop
FindStr = opStr
'**********************
Case 9 '从右向左匹配字符串
For i = Len(sourceStr) To 1 Step -1
sourceStrtemp = sourceStrtemp & Mid$(sourceStr, i, 1)
Next
For i = Len(sStr) To 1 Step -1
sourceStrtemp2 = sourceStrtemp2 & Mid$(sStr, i, 1)
Next
S = InStr(sourceStrtemp, sourceStrtemp2)
If S <> 0 Then
sourceStrtemp = Mid$(sourceStrtemp, 1, S - 1)
sourceStrtemp2 = ""
For i = Len(sourceStrtemp) To 1 Step -1
sourceStrtemp2 = sourceStrtemp2 & Mid$(sourceStrtemp, i, 1)
Next
FindStr = sourceStrtemp2
Else
FindStr = ""
End If
End Select
End Function
⑷ VB 字符串操作,去掉html代码
工程>引用>Microsoft VBScript Regular Expressions 5.5
'新建窗体,添加command1
Private Sub Command1_Click()
Dim mystr As String, re As RegExp
mystr = "<a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_0')" & "href=" & """" & "http://movie.gougou.com/Sections/movies?search=%b7%b6%a1%a4%b5%cf%c8%fb%b6%fb&searchby=2&page=1" & """" & " target='_blank'>范·迪塞尔</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_1')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?" & _
"search=%b1%a3%c2%de%a1%a4%ce%d6%bf%cb&searchby=2&page=1 " & """" & "target='_blank'>保罗·沃克</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_2')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?search=%c3%d7%d0%aa%b6%fb%a1%a4%c2%de%b5%c2%c0%ef%b8%f1%d7%c8&searchby=2&page=1" & """" & " target='_blank'>米歇尔" & _
"·罗德里格兹</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_3')" & """" & " href=" & """" & "http://movie.gougou.com/Sections/movies?search=%c7%c7%b5%a4%c4%c8%a1%a4%b2%bc%c2%b3%cb%b9%cc%d8&searchby=2&page=1" & """" & " target='_blank'>乔丹娜·布鲁斯特</a> <a onmousedown=" & """" & "return tongji(this.innerHTML,this.href,'artist_4')" & """" & _
"href=" & """" & "http://movie.gougou.com/Sections/movies?search=%41%6c%6f%6e%73%6f&searchby=2&page=1" & """" & " target='_blank'>Alonso</a> "
Set re = New RegExp
re.IgnoreCase = True
re.Global = True
re.Pattern = "<a.*?onmousedown.+?blank.+?>|</a>"
MsgBox re.Replace(mystr, "")
Set re = Nothing
End Sub
⑸ 用vb读取一个html文件,去掉标签只显示其中文本内容
用webbrower控件
请看下例
’声明:该程序由csdn论坛获得
dim dwinfolder as new shellwindows
dim withevents eventie as webbrowser_v1
private sub command1_click()
dim objie as object
for each objie in dwinfolder
if objie.locationurl = list1.list(list1.listindex) then
set eventie = objie
command1.enabled = false
list1.enabled = false
text1.text = ""
exit for
end if
next
end sub
private sub eventie_navigatecomplete(byval url as string)
text1.text = text1.text + chr(13) + chr(10) + url
end sub
在运行前。点击菜单 projects | references 项,在available references 列表中选择microsoft internet controls项将internet对象引用介入到工程中
private sub form_load()
dim objie as object
for each objie in dwinfolder
if instr(1, objie.fullname, "iexplore.exe", vbtextcompare) <> 0 then
list1.additem objie.locationurl
end if
next
command1.caption = "正文"
end sub
private sub form_unload(cancel as integer)
set dwinfolder = nothing
end sub
private sub list1_click()
dim objdoc as object
dim objie as object
for each objie in dwinfolder
if objie.locationurl = list1.list(list1.listindex) then
set objdoc = objie.document
for i = 1 to objdoc.all.length - 1
if objdoc.all(i).tagname = "body" then
text1.text = objdoc.all(i).innertext
end if
next
exit for
end if
next
end sub
⑹ 如何过滤HTML标签,或者读取数据时,去处HTML标签
如果你把html标签除掉了问题会更大。
如果你不需要所见即所得的编辑器,那么可以直接使用textarea。在把用户输入的html标签过滤掉就行了。
⑺ 字符串中如何过滤HTML标签字符
下面是asp中的方法,你可以改造成.net的
Function FilterHTML(strToFilter)
Dim strTemp
strTemp = strToFilter
strTemp=replace(strTemp,"""","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp," ","")
strTemp=replace(strTemp,"&","")
Dim n,m '定义三个变量
n = inStr(strTemp,"<") '找到第一个"<"所在的位置
m = inStr(strTemp,">") '找到第一个">"所在的位置
Do while n > 0 and n < m '如果n>0则说明找到了一个"<",如果n<m则说明"<"在">"的左边,则"<"和">"之间的字符串为HTML代码,需要过滤掉
strTemp = Left(strTemp,n-1) & Mid(strTemp,m+1) '取"<"左边的字符串和">"右边的字符串并将他们连接在一起
n = inStr(strTemp,"<") '找到剩余字符串中第一个"<"所在的位置
m = inStr(strTemp,">") '找到剩余字符串中第一个">"所在的位置
Loop '循环
FilterHTML = strTemp
End Function
⑻ vb过滤html图片标签的正则表达式
<img.*?>