From c37ec0ba1944dc4066a603021b7cea514cc5c237 Mon Sep 17 00:00:00 2001
From: rootphantomer <root@ph4nt0mer.xyz>
Date: Wed, 17 Jul 2024 17:10:49 +0800
Subject: [PATCH] =?UTF-8?q?style(auto=5Fcomment=5Fplus,jdspider):=20?=
 =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81=E6=A0=BC=E5=BC=8F=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

优化代码格式，更符合 python 最新代码格式标准
---
 auto_comment_plus.py | 26 +++++++++++++++-----------
 jdspider.py          | 41 +++++++++++++++++++++++------------------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/auto_comment_plus.py b/auto_comment_plus.py
index 6495419..46ad1b0 100644
--- a/auto_comment_plus.py
+++ b/auto_comment_plus.py
@@ -5,12 +5,12 @@
 
 import argparse
 import copy
-import urllib
 import logging
 import os
 import random
 import sys
 import time
+import urllib
 
 import jieba  # just for linting
 import jieba.analyse
@@ -20,7 +20,6 @@
 
 import jdspider
 
-
 # constants
 CONFIG_PATH = "./config.yml"
 USER_CONFIG_PATH = "./config.user.yml"
@@ -90,6 +89,7 @@ def format(self, record):
 
 # 评价生成
 def generation(pname, _class: int = 0, _type: int = 1, opts: object = None):
+    result = []
     opts = opts or {}
     items = ["商品名"]
     items.clear()
@@ -135,6 +135,7 @@ def generation(pname, _class: int = 0, _type: int = 1, opts: object = None):
         opts["logger"].debug("_class is 1. Directly return name")
         return name
     else:
+        num = 0
         if _type == 1:
             num = 6
         elif _type == 0:
@@ -318,11 +319,11 @@ def ordinary(N, opts=None):
             if not opts.get("dry_run"):
                 opts["logger"].debug("Sending comment request")
                 pj2 = requests.post(url2, headers=headers2, data=data2)
+                opts["logger"].info(
+                    "发送请求后的状态码:{},text:{}".format(pj2.status_code, pj2.text)
+                )
             else:
                 opts["logger"].debug("Skipped sending comment request in dry run")
-            opts["logger"].info(
-                "发送请求后的状态码:{},text:{}".format(pj2.status_code, pj2.text)
-            )
             if pj2.status_code == 200 and pj2.json()["success"]:
                 # 当发送后的状态码 200，并且返回值里的 success 是 true 才是晒图成功，此外所有状态均为晒图失败
                 opts["logger"].info(f"\t{i}.评价订单\t{oname}[{oid}]并晒图成功")
@@ -513,11 +514,11 @@ def review(N, opts=None):
         if not opts.get("dry_run"):
             opts["logger"].debug("Sending comment request")
             pj1 = requests.post(url1, headers=headers2, data=data1)
+            opts["logger"].debug(
+                "发送请求后的状态码:{},text:{}".format(pj1.status_code, pj1.text)
+            )
         else:
             opts["logger"].debug("Skipped sending comment request in dry run")
-        opts["logger"].debug(
-            "发送请求后的状态码:{},text:{}".format(pj1.status_code, pj1.text)
-        )
         opts["logger"].info("完成")
         opts["logger"].debug("Sleep time (s): %.1f", REVIEW_SLEEP_SEC)
         time.sleep(REVIEW_SLEEP_SEC)
@@ -766,7 +767,8 @@ def main(opts=None):
 
     headers2 = {
         "cookie": ck.encode("utf-8"),
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.110 Safari/537.36",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                      "Chrome/114.0.5735.110 Safari/537.36",
         "Connection": "keep-alive",
         "Cache-Control": "max-age=0",
         "X-Requested-With": "XMLHttpRequest",
@@ -787,7 +789,8 @@ def main(opts=None):
     }
     headers = {
         "cookie": ck.encode("utf-8"),
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                      "Chrome/98.0.4758.82 Safari/537.36",
         "Connection": "keep-alive",
         "Cache-Control": "max-age=0",
         "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
@@ -795,7 +798,8 @@ def main(opts=None):
         "sec-ch-ua-platform": '"Windows"',
         "DNT": "1",
         "Upgrade-Insecure-Requests": "1",
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,"
+                  "application/signed-exchange;v=b3;q=0.9",
         "Sec-Fetch-Site": "same-site",
         "Sec-Fetch-Mode": "navigate",
         "Sec-Fetch-User": "?1",
diff --git a/jdspider.py b/jdspider.py
index 103fdad..513f645 100644
--- a/jdspider.py
+++ b/jdspider.py
@@ -7,14 +7,14 @@
 import random
 import re
 import sys
-import time, yaml
+import time
 from urllib.parse import quote, urlencode
 
 import requests
+import yaml
 import zhon.hanzi
 from lxml import etree
 
-
 # Reference: https://github.com/fxsjy/jieba/blob/1e20c89b66f56c9301b0feed211733ffaa1bd72a/jieba/__init__.py#L27
 with open("./config.yml", "r", encoding="utf-8") as f:
     cfg = yaml.safe_load(f)
@@ -35,7 +35,8 @@ def __init__(self, categlory):
         )
         self.commentBaseUrl = "https://sclub.jd.com/comment/productPageComments.action?"
         self.headers = {
-            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,"
+                      "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
             "accept-encoding": "gzip, deflate, br",
             "accept-language": "zh-CN,zh;q=0.9",
             "cache-control": "max-age=0",
@@ -47,10 +48,12 @@ def __init__(self, categlory):
             "sec-fetch-site": "none",
             "sec-fetch-user": "?1",
             "upgrade-insecure-requests": "1",
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                          "Chrome/98.0.4758.82 Safari/537.36",
         }
         self.headers2 = {
-            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,"
+                      "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
             "accept-encoding": "gzip, deflate, br",
             "accept-language": "zh-CN,zh;q=0.9",
             "cache-control": "max-age=0",
@@ -62,7 +65,8 @@ def __init__(self, categlory):
             "sec-fetch-site": "none",
             "sec-fetch-user": "?1",
             "upgrade-insecure-requests": "1",
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                          "Chrome/98.0.4758.82 Safari/537.36",
             "cookie": cookie,
         }
         self.productsId = self.getId()
@@ -72,10 +76,10 @@ def __init__(self, categlory):
 
     def getParamUrl(self, productid: str, page: str, score: str) -> tuple[dict, str]:
         params = {  # 用于控制页数，页面信息数的数据，非常重要，必不可少，要不然会被JD识别出来，爬不出相应的数据。
-            "productId": "%s" % (productid),
-            "score": "%s" % (score),  # 1表示差评，2表示中评，3表示好评
+            "productId": "%s" % productid,
+            "score": "%s" % score,  # 1表示差评，2表示中评，3表示好评
             "sortType": "5",
-            "page": "%s" % (page),
+            "page": "%s" % page,
             "pageSize": "10",
             "isShadowSku": "0",
             "rid": "0",
@@ -90,8 +94,9 @@ def getHeaders(
         dict
     ):  # 和初始的self.header不同，这是爬取某个商品的header，加入了商品id，我也不知道去掉了会怎样。
         header = {
-            "Referer": "https://item.jd.com/%s.html" % (productid),
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
+            "Referer": "https://item.jd.com/%s.html" % productid,
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                          "Chrome/75.0.3770.142 Safari/537.36",
             # "cookie": cookie,
         }
         return header
@@ -110,7 +115,7 @@ def getId(
     def getData(
         self,
         maxPage: int,
-        score: str,
+        score: int,
     ):  # maxPage是爬取评论的最大页数，每页10条数据。差评和好评的最大一般页码不相同，一般情况下：好评>>差评>中评
         # maxPage遇到超出的页码会自动跳出，所以设大点也没有关系。
         # score是指那种评价类型，好评3、中评2、差评1。
@@ -121,14 +126,14 @@ def getData(
             "爬取商品数量最多为8个,请耐心等待,也可以自行修改jdspider文件"
         )
         if len(self.productsId) < 8:  # limit the sum of products
-            sum = len(self.productsId)
+            sum_ = len(self.productsId)
         else:
-            sum: int = 3
-        for j in range(sum):
-            id: str = self.productsId[j]
+            sum_: int = 3
+        for j in range(sum_):
+            id_: str = self.productsId[j]
             # header = self.getHeaders(id)
             for i in range(1, maxPage):
-                param, url = self.getParamUrl(id, i, score)
+                param, url = self.getParamUrl(id_, str(i), str(score))
                 default_logger.info(
                     f"正在爬取当前商品的评论信息>>>>>>>>>第：%d 个，第 %d 页"
                     % (j + 1, i)
@@ -152,7 +157,7 @@ def getData(
                     continue
                 if len((res_json["comments"])) == 0:
                     default_logger.warning(
-                        "页面次数已到：%d,超出范围(或未爬取到评论)" % (i)
+                        "页面次数已到：%d,超出范围(或未爬取到评论)" % i
                     )
                     break
                 for cdit in res_json["comments"]: