async def _scrape_details_with_progress(self, page: Page):
start_time = time.time()
with tqdm(total=len(self.products), desc="详情解析", unit="项", dynamic_ncols=True) as pbar:
for idx, item in enumerate(self.products, 1):
await self.scrape_details(page, item)
pbar.update(1)
self._update_remaining(pbar, start_time)
async def _download_images_with_progress(self):
start_time = time.time()
async with aiohttp.ClientSession() as session:
with tqdm(total=len(self.products), desc="图片下载", unit="张", dynamic_ncols=True) as pbar:
for idx, item in enumerate(self.products, 1):
await self.download_image(session, item)
pbar.update(1)
self._update_remaining(pbar, start_time)
@staticmethod
def _update_remaining(pbar, start_time: float):
elapsed = time.time() - start_time
rate = pbar.n / elapsed if elapsed > 0 else 0
remaining = (pbar.total - pbar.n) / rate if rate > 0 else 0
pbar.set_postfix_str(f"剩余 {remaining:,.1f}s")
def _export_csv(self):
if not self.products:
return
with open(CSV_FILE, "w", newline='', encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=self.products[0].keys())
writer.writeheader()
writer.writerows(self.products)
def _export_json(self):
with open(JSON_FILE, "w", encoding="utf-8") as f:
json.dump(self.products, f, ensure_ascii=False, indent=2)
# ---------------------------- 脚本入口 ----------------------------
if name == "main":
parser = argparse.ArgumentParser(description="ETMOC 品牌爬虫")
parser.add_argument("--pages", type=str, default="1",
help="抓取页数,默认1页;传 all 表示全部页")
args = parser.parse_args()
# 解析页数参数
if args.pages.lower() == "all":
pages_limit = 0 # 全部页
else:
try:
pages_limit = int(args.pages)
except ValueError:
pages_limit = 1
scraper = ETMOCScraper(pages_limit=pages_limit)
asyncio.run(scraper.run_scrape())
start_time = time.time()
with tqdm(total=len(self.products), desc="详情解析", unit="项", dynamic_ncols=True) as pbar:
for idx, item in enumerate(self.products, 1):
await self.scrape_details(page, item)
pbar.update(1)
self._update_remaining(pbar, start_time)
async def _download_images_with_progress(self):
start_time = time.time()
async with aiohttp.ClientSession() as session:
with tqdm(total=len(self.products), desc="图片下载", unit="张", dynamic_ncols=True) as pbar:
for idx, item in enumerate(self.products, 1):
await self.download_image(session, item)
pbar.update(1)
self._update_remaining(pbar, start_time)
@staticmethod
def _update_remaining(pbar, start_time: float):
elapsed = time.time() - start_time
rate = pbar.n / elapsed if elapsed > 0 else 0
remaining = (pbar.total - pbar.n) / rate if rate > 0 else 0
pbar.set_postfix_str(f"剩余 {remaining:,.1f}s")
def _export_csv(self):
if not self.products:
return
with open(CSV_FILE, "w", newline='', encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=self.products[0].keys())
writer.writeheader()
writer.writerows(self.products)
def _export_json(self):
with open(JSON_FILE, "w", encoding="utf-8") as f:
json.dump(self.products, f, ensure_ascii=False, indent=2)
# ---------------------------- 脚本入口 ----------------------------
if name == "main":
parser = argparse.ArgumentParser(description="ETMOC 品牌爬虫")
parser.add_argument("--pages", type=str, default="1",
help="抓取页数,默认1页;传 all 表示全部页")
args = parser.parse_args()
# 解析页数参数
if args.pages.lower() == "all":
pages_limit = 0 # 全部页
else:
try:
pages_limit = int(args.pages)
except ValueError:
pages_limit = 1
scraper = ETMOCScraper(pages_limit=pages_limit)
asyncio.run(scraper.run_scrape())