1.抓取網站第一個<h3></h3>標籤
2.使用for迴圈抓取所有的<h3></h3>
目標網址: https://www.dotblogs.com.tw/YiruAtStudio
data:image/s3,"s3://crabby-images/23c6e/23c6eb994ffc2c4b4a5847c7af6822c6d436f98c" alt=""
1.抓取網站第一個<h3></h3>標籤
import requests
from bs4 import BeautifulSoup
def main():
resp = requests.get('https://www.dotblogs.com.tw/YiruAtStudio')
soup = BeautifulSoup(resp.text, 'html.parser')
# 取得第一篇 blog (h3)
print(soup.find('h3'))
print(soup.h3) # 與上一行相等
# 取得第一篇 blog 主標題(只取文字)
print(soup.h3.text)
print(soup.h3.a.text)# 與上一行相等
print(soup.h3.a.string)
if __name__ == '__main__':
main()
data:image/s3,"s3://crabby-images/1d997/1d997653af5ebdf181f12bbfb6807792d2411a51" alt=""
2.使用for迴圈抓取所有的<h3></h3>
#把所有的h3都讀出來
import requests
from bs4 import BeautifulSoup
def main():
resp = requests.get('https://www.dotblogs.com.tw/YiruAtStudio')
soup = BeautifulSoup(resp.text, 'html.parser')
# 取得所有 blog 主標題, 使用 tag
main_titles = soup.find_all('h3')
for title in main_titles:
print(title.a.text)
if __name__ == '__main__':
main()
data:image/s3,"s3://crabby-images/6e5c4/6e5c44c8256913e2bab217a5cb6485d1a430cc5c" alt=""
Yiru@Studio - 關於我 - 意如