* coding : utf-8 *
@Time : 2021/11/20 17:28
@Author : Harken
ajax的get请求豆瓣电影第一页,并保存
import urllib.request
import data as data
url = ‘https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=0&limit=20’ headers = { ‘User-Agent’:’ Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’ }
1.请求对象的定制
request = urllib.request.Request(url=url,headers=headers,)
2.获取响应的数据
response = urllib.request.urlopen(request) content = response.read().decode(‘utf-8’)
3.数据下载到本地(即数据写到本地)
fp = open(‘douban.json’,‘w’,encoding = ‘utf-8’)
open方法默认情况下使用的是gbk编码
因为里面有中文,所以触发encoding=None,保存的时候要指定编码,使用encoding=‘utf-8’
fp.write(content)
另一种写法
with open(‘douban.json’,‘w’,encoding=‘uft-8’) as fp:
fp.write(content)
ajax的get请求豆瓣电影前十页,并保存
https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=0&limit=20(第1页)
https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=20&limit=20(第2页)
https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start=40&limit=20(第3页)
规律 start=40&limit=20
page 1 2 3 4
start 0 20 40 60
start(page - 1)*20
import urllib.parse import urllib.request
def create_request(page): base_url = ‘https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&’ data = { ‘start’:(page - 1) * 20, ‘limit’:20 }
data = urllib.parse.urlencode(data)
url = base_url + data
headers = { ‘User-Agent’: ’ Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’ }
request = urllib.request.Request(url=url,headers=headers,) return request
def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode(‘utf-8’) return content
def down_load(page): with open(‘douban_1’ + str(page) + ‘.json’,‘w’,encoding=‘uft-8’)as fp fp.write()
以企业级开发的流程来写
1.请求对象的定制
2.获取响应的数据
3.数据下载到本地
程序的入口
if name == ‘main’: start_page = int(input(‘请输入起始的页码’)) end_page = int(input(‘请输入结束的页码’)) # 字符串变整形 for page in range(start_page,end_page+1): # 遍历 # 每一页都有自己请对象的定制 request = create_request(page) # 获取 content = get_content(request) #下载 down_load(page,content)
发表评论