python BeautifulSoup庫(kù)的常用操作
BeautifulSoup庫(kù)
0、所有方法都有的
from bs4 import BeautifulSoup
# 前面幾個(gè)方法使用的都是這個(gè)參數(shù),所以統(tǒng)一使用這個(gè)(后面的那些方法沒(méi)有引用這個(gè)html文本文件)
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>,
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""1、基本用法
'''
基本用法demo1
'''
def demo01(html_doc):
# 這里的作用是將html_doc中缺少的標(biāo)簽補(bǔ)充完善,使用的庫(kù)是lxml進(jìn)行補(bǔ)全
soup = BeautifulSoup(html_doc, "lxml")
# 更正html_doc的格式,使得上面文本的格式是正確的
print(soup.prettify())
# 查看經(jīng)過(guò)上面步驟處理過(guò)后的結(jié)果
print(soup.title.string)2、節(jié)點(diǎn)選擇器
'''
節(jié)點(diǎn)選擇器demo2
'''
def demo02(html_doc):
soup = BeautifulSoup(html_doc, 'lxml')
# 選擇html_doc中的title標(biāo)簽
# 結(jié)果:<title>The Dormouse's story</title>
print(soup.title)
# 查看對(duì)應(yīng)的類(lèi)型
# 結(jié)果:<class 'bs4.element.Tag'>
print(type(soup.title))
# 結(jié)果:The Dormouse's story
print(soup.title.string)
# 結(jié)果:<head><title>The Dormouse's story</title></head>
print(soup.head)
# 結(jié)果:<p class="title"><b>The Dormouse's story</b></p>
print(soup.p)
# 結(jié)果:<class 'bs4.element.Tag'>
print(type(soup.p))
# 結(jié)果:<a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link1">Elsie</a> 【默認(rèn)返回第一個(gè)】
print(soup.a)3、提取節(jié)點(diǎn)信息
'''
提取節(jié)點(diǎn)信息demo3
'''
def demo03(html_doc):
soup = BeautifulSoup(html_doc, "lxml")
# <a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>
tag = soup.a
# 1、獲取名稱(chēng)
# 結(jié)果:a
print(tag.name)
# 2、獲取屬性值
# 結(jié)果:
# class值為: ['sister']
# href值為: http://example.com/elsie
print("class值為: ", tag.attrs["class"])
print("href值為: ", tag.attrs["href"])
# 3、獲取內(nèi)容
# 結(jié)果:Elsie
print(tag.string)4、獲取子節(jié)點(diǎn)信息
'''
獲取子節(jié)點(diǎn)信息demo4
'''
def demo04(html_doc):
soup = BeautifulSoup(html_doc, 'lxml')
# 1、首先獲取head標(biāo)簽的內(nèi)容部分
# 結(jié)果:<head><title>The Dormouse's story</title></head>
print(soup.head)
# 2、然后獲取head中title標(biāo)簽的內(nèi)容
# 結(jié)果:<title>The Dormouse's story</title>
print(soup.head.title)
# 3、獲取head中title下的文本內(nèi)容
# 結(jié)果:The Dormouse's story
print(soup.head.title.string)5、關(guān)聯(lián)選擇
1、獲取子節(jié)點(diǎn)--contents
'''
關(guān)聯(lián)選擇demo05--01--下級(jí)節(jié)點(diǎn)
使用contents屬性進(jìn)行獲取--獲取子節(jié)點(diǎn)
介紹:
在做選擇的時(shí)候,有時(shí)候不能做到一步就獲取到我想要的節(jié)點(diǎn)元素,需要選取某一個(gè)節(jié)點(diǎn)元素,
然后以這個(gè)節(jié)點(diǎn)為基準(zhǔn)再選取它的子節(jié)點(diǎn)、父節(jié)點(diǎn)、兄弟節(jié)點(diǎn)等
'''
def demo05():
# 注意它的第一個(gè)p標(biāo)簽沒(méi)有換行展示
html_doc01 = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="Dormouse"><b>The Dormouse's story</b></p>
<p class="story">...</p>
"""
# 注意它和html_doc01的區(qū)別在于,p標(biāo)簽進(jìn)行了換行
html_doc02 = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="Dormouse"><b>The Dormouse's story</b>
</p>
<p class="story">...</p>
"""
# 1、獲取節(jié)點(diǎn)的子節(jié)點(diǎn)和子孫節(jié)點(diǎn)--contents屬性
soup01 = BeautifulSoup(html_doc01, "lxml")
# 結(jié)果:[<b>The Dormouse's story</b>]
print(soup01.p.contents)
soup02 = BeautifulSoup(html_doc02, "lxml")
# 注意這里的結(jié)果多了一個(gè)換行符
# 結(jié)果:[<b>The Dormouse's story</b>, '\n']
print(soup02.p.contents)2、獲取子節(jié)點(diǎn)--children
'''
關(guān)聯(lián)選擇demo06--02--下級(jí)節(jié)點(diǎn)
使用children屬性進(jìn)行獲取--獲取子節(jié)點(diǎn)
'''
def demo06():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="story">
Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>,
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.
</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 結(jié)果:<list_iterator object at 0x000002B35915BFA0
print(soup.p.children)
# 結(jié)果:[
# '\n Once upon a time there were three little sisters; and their names were\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link1">Elsie</a>,
# ',\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link2">Lacie</a>,
# ' and\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link3">Tillie</a>,
# ';\n and they lived at the bottom of a well.\n '
# ]
print(list(soup.p.children))
for item in soup.p.children:
print(item)3、獲取子孫節(jié)點(diǎn)--descendants
'''
關(guān)聯(lián)選擇demo07--03--下級(jí)節(jié)點(diǎn)
使用descendants屬性進(jìn)行獲取--獲取子孫節(jié)點(diǎn)(獲?。鹤庸?jié)點(diǎn)和孫節(jié)點(diǎn)的內(nèi)容)
'''
def demo07():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="story">Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1"><span>Elsie</span>Elsie</a>,
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 結(jié)果:<generator object Tag.descendants at 0x000001C0E79DCC10>
print(soup.p.descendants)
# 結(jié)果:[
# 'Once upon a time there were three little sisters; and their names were\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link1"><span>Elsie</span>Elsie</a>,
# <span>Elsie</span>,
# 'Elsie',
# 'Elsie',
# ',\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link2">Lacie</a>,
# 'Lacie',
# ' and\n ',
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link3">Tillie</a>,
# 'Tillie',
# ';\n and they lived at the bottom of a well.'
# ]
print(list(soup.p.descendants))
# for item in soup.p.descendants:
# print(item)4、獲取父節(jié)點(diǎn)--parent、祖先節(jié)點(diǎn)--parents
'''
關(guān)聯(lián)選擇demo08--01--上級(jí)節(jié)點(diǎn)
使用parent屬性進(jìn)行獲取--獲取父節(jié)點(diǎn)
使用parents屬性進(jìn)行獲取--獲取祖先節(jié)點(diǎn)
'''
def demo08():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="story">
Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>
<p>
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a>
</p>
</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 會(huì)打印出<body>標(biāo)簽中所有的內(nèi)容,包括子節(jié)點(diǎn)p標(biāo)簽和孫節(jié)點(diǎn)a標(biāo)簽等全部的值
print(soup.p.parent)
# 獲取第一個(gè)a標(biāo)簽的父節(jié)點(diǎn)p標(biāo)簽的值,包括當(dāng)前的這個(gè)a標(biāo)簽中的文本內(nèi)容
print(soup.a.parent)
print("=======================")
# 結(jié)果:<generator object PageElement.parents at 0x000001403E6ECC10>
print(soup.a.parents)
for i, parent in enumerate(soup.a.parents):
print(i, parent)5、獲取兄弟節(jié)點(diǎn)
'''
關(guān)聯(lián)選擇demo09--兄弟節(jié)點(diǎn)
# 可以使用的屬性有:
1、next_sibling
2、previous_sibling
3、next_siblings
4、previous_siblings
'''
def demo09():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="story">Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>hello
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3">Tillie</a>
<a rel="external nofollow" class="sister" id="link3">a</a>
<a rel="external nofollow" class="sister" id="link3">b</a>
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 1、使用next_sibling
# 結(jié)果:hello
print(soup.a.next_sibling)
# 2、使用next_siblings
# 結(jié)果:<generator object PageElement.next_siblings at 0x00000241CA26CC10>
print(soup.a.next_siblings)
# print(list(soup.a.next_siblings))
# 3、使用previous_sibling
# 結(jié)果:Once upon a time there were three little sisters; and their names were
print(soup.a.previous_sibling)
# 4、使用previous_siblings
# <generator object PageElement.previous_siblings at 0x000001F4E6E6CBA0>
print(soup.a.previous_siblings)
# print(list(soup.a.previous_siblings))6、方法選擇器
1、find_all()
'''
方法選擇器 -- find_all() -- 以列表形式返回多個(gè)元素
find_all(name, attrs={}, recursive=True, string, limit)
# 1、name: 標(biāo)簽的名稱(chēng)--查找標(biāo)簽
# 2、attrs: 屬性過(guò)濾器字典
# 3、recursive: 遞歸查找一個(gè)元素的子孫元素們,默認(rèn)為T(mén)rue
# 4、string:查找文本
# 5、limit: 查找結(jié)果的個(gè)數(shù)限制
'''
def demo10():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="Dormouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1">Elsie</a>,
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2">Lacie</a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 1、【基本使用】找到所有的a標(biāo)簽
# 結(jié)果:[
# <a class="sister hi" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link1">Elsie</a>,
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link2">Lacie</a>,
# <a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link3">Tillie</a>
# ]
print(soup.find_all("a"))
# for item in soup.find_all("a"):
# print(item.string)
# 2、【屬性查找】根據(jù)指定的屬性字典進(jìn)行元素的查找,這里查找的是class為sister的元素
print(soup.find_all(attrs={"class": "sister"}))
# 效果同上
print(soup.find_all(class_ = "sister"))
# ============這個(gè)沒(méi)有找到結(jié)果,需找到原因============
print(soup.find_all(class_ = "hi"))
# 3、【文本查找】查找文本為Elsie的內(nèi)容
print(soup.find_all(string="Elsie"))2、find()
'''
方法選擇器 -- find() -- 返回單個(gè)元素【一般是返回第一個(gè)元素作為結(jié)果】
'''
def demo11():
html_doc = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="Dormouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link1"><span>Elsie</span></a>,
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link2"><span>Lacie</span></a> and
<a rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" class="sister" id="link3"><span>Tillie</span></a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 結(jié)果:<a class="sister" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" rel="external nofollow" id="link1"><span>Elsie</span></a>
print(soup.find("a"))3、其他方法選擇器
''' 其他方法選擇器 find_parents(): 返回所以的祖先節(jié)點(diǎn) find_parent(): 返回當(dāng)前節(jié)點(diǎn)的父節(jié)點(diǎn) find_next_siblings():返回當(dāng)前節(jié)點(diǎn)后面的所有兄弟節(jié)點(diǎn) find_previous_siblings():返回當(dāng)前節(jié)點(diǎn)后面的相鄰的那個(gè)兄弟節(jié)點(diǎn) find_next_sibling():返回當(dāng)前節(jié)點(diǎn)前面的所有兄弟節(jié)點(diǎn) find_previous_sibling():返回當(dāng)前節(jié)點(diǎn)前面的相鄰的那個(gè)兄弟節(jié)點(diǎn) '''
7、CSS選擇器--select()
'''
CSS選擇器 -- select()方法
'''
def demo12():
html_doc = """
<div class="panel">
<div class="panel-heading">
<h4>Hello World</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-samll" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
</div>
</div>
</div>
"""
soup = BeautifulSoup(html_doc, "lxml")
# 1、獲取class為panel-heading的節(jié)點(diǎn)
# 結(jié)果:[<div class="panel-heading">
# <h4>Hello World</h4>
# </div>]
print(soup.select(".panel-heading"))
# 2、獲取ul下的li節(jié)點(diǎn)
# 結(jié)果:[<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>, <li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>]
print(soup.select("ul li"))
# 3、獲取id為list-2下的li節(jié)點(diǎn)
# 結(jié)果:[<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>]
print(soup.select("#list-2 li"))
# 4、獲取所有的ul節(jié)點(diǎn)
# 結(jié)果:[<ul class="list" id="list-1">
# <li class="element">Foo</li>
# <li class="element">Bar</li>
# <li class="element">Jay</li>
# </ul>, <ul class="list list-samll" id="list-2">
# <li class="element">Foo</li>
# <li class="element">Bar</li>
# <li class="element">Jay</li>
# </ul>]
print(soup.select("ul"))
# 結(jié)果:<class 'bs4.element.Tag'>
print(type(soup.select('ul')[0]))說(shuō)明:
# 1、查詢(xún)所有的子孫節(jié)點(diǎn)
在 select(css)中的 css 有多個(gè)節(jié)點(diǎn)時(shí),節(jié)點(diǎn)元素之間用空格分開(kāi),就是查找子孫節(jié)點(diǎn),
例如 soup.select(“div p”)是查找所有<div>節(jié)點(diǎn)下面的所有子孫<p>節(jié)點(diǎn)。# 2、只查直接的子節(jié)點(diǎn),不查孫節(jié)點(diǎn)
節(jié)點(diǎn)元素之間用" > "分開(kāi)(注意>的前后至少包含一個(gè)空格),就是查找直接子節(jié)點(diǎn):
# 例如 soup.select(“div > p”)是查找所有<div>節(jié)點(diǎn)下面的所有直接子節(jié)點(diǎn)<p>,不包含孫節(jié)點(diǎn)。# 3、查找某個(gè)節(jié)點(diǎn)同級(jí)別的某類(lèi)節(jié)點(diǎn)
用" ~ "連接兩個(gè)節(jié)點(diǎn)表示查找前一個(gè)節(jié)點(diǎn)后面的所有同級(jí)別的兄弟節(jié)點(diǎn)(注意~號(hào)前后至少有一個(gè)空格),
例如 soup.select(“div ~ p”)查找<div>后面的所有同級(jí)別的<p>兄弟節(jié)點(diǎn)。# 4、查找同級(jí)別某個(gè)節(jié)點(diǎn)后的第一個(gè)某類(lèi)節(jié)點(diǎn)
用" + "連接兩個(gè)節(jié)點(diǎn)表示查找前一個(gè)節(jié)點(diǎn)后面的第一個(gè)同級(jí)別的兄弟節(jié)點(diǎn)(注意+號(hào)前后至少有一個(gè)空格):
例如 soup.select(“div + p”)查找<div>后面的第一個(gè)同級(jí)別的<p>兄弟節(jié)點(diǎn)。
8、嵌套選擇--select()
'''
嵌套選擇 -- select( )方法
'''
def demo13():
html_doc = """
<div class="panel">
<div class="panel-heading">
<h4>Hello World</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-samll" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
</div>
</div>
</div>
"""
soup = BeautifulSoup(html_doc, 'lxml')
# 運(yùn)行結(jié)果:[<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>]
# [<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>]
for ul in soup.select('ul'):
print(ul.select('li'))9、獲取屬性
'''
獲取屬性(兩種方法)
'''
def demo14():
html_doc = """
<div class="panel">
<div class="panel-heading">
<h4>Hello World</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
<ul class="list list-samll" id="list-2">
<li class="element">Foo</li>
<li class="element">Bar</li>
<li class="element">Jay</li>
</ul>
</div>
</div>
</div>
"""
soup = BeautifulSoup(html_doc, 'lxml')
for ul in soup.select('ul'):
print(ul['id'])
print(ul.attrs['id'])以上就是python BeautifulSoup庫(kù)的常用操作的詳細(xì)內(nèi)容,更多關(guān)于python BeautifulSoup的資料請(qǐng)關(guān)注腳本之家其它相關(guān)文章!
相關(guān)文章
Python中__new__與__init__方法的區(qū)別詳解
這篇文章主要介紹了Python中__new__與__init__方法的區(qū)別,是Python學(xué)習(xí)中的基礎(chǔ)知識(shí),需要的朋友可以參考下2015-05-05
python opencv實(shí)現(xiàn)旋轉(zhuǎn)矩形框裁減功能
這篇文章主要為大家詳細(xì)介紹了python opencv實(shí)現(xiàn)旋轉(zhuǎn)矩形框裁減功能,具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2018-07-07
pycharm使用Translation插件實(shí)現(xiàn)翻譯功能
PyCharm是一款很流行的Python編輯器,經(jīng)常遇到在PyCharm中把中文翻譯成英文的需求,下面這篇文章主要給大家介紹了關(guān)于pycharm使用Translation插件實(shí)現(xiàn)翻譯功能的相關(guān)資料,需要的朋友可以參考下2023-05-05
淺談python新式類(lèi)和舊式類(lèi)區(qū)別
python的新式類(lèi)是2.2版本引進(jìn)來(lái)的,我們可以將之前的類(lèi)叫做經(jīng)典類(lèi)或者舊式類(lèi)。這篇文章主要介紹了淺談python新式類(lèi)和舊式類(lèi)區(qū)別,具有一定的參考價(jià)值,感興趣的小伙伴們可以參考一下2019-04-04
Python實(shí)現(xiàn)隨機(jī)森林回歸與各自變量重要性分析與排序
這篇文章主要為大家詳細(xì)介紹了在Python環(huán)境中,實(shí)現(xiàn)隨機(jī)森林(Random Forest,RF)回歸與各自變量重要性分析與排序的過(guò)程,感興趣的小伙伴可以了解一下2023-02-02

