﻿{"id":3998,"date":"2026-04-01T17:38:20","date_gmt":"2026-04-01T09:38:20","guid":{"rendered":"http:\/\/www.jiayus.com\/?p=3998"},"modified":"2026-04-01T17:38:20","modified_gmt":"2026-04-01T09:38:20","slug":"%e8%88%86%e6%83%85%e7%9b%91%e6%b5%8b%e4%b8%8eapi%e6%8e%a5%e5%8f%a3%e7%9a%84%e6%95%b0%e6%8d%ae%e5%af%b9%e6%8e%a5%e5%ae%9e%e6%88%98","status":"publish","type":"post","link":"http:\/\/www.jiayus.com\/?p=3998","title":{"rendered":"\u8206\u60c5\u76d1\u6d4b\u4e0eAPI\u63a5\u53e3\u7684\u6570\u636e\u5bf9\u63a5\u5b9e\u6218"},"content":{"rendered":"<p><html><br \/>\n<title>ARTICLE_TITLE<\/title><br \/>\n<body><\/p>\n<div class=\"article-content\">\n<h1>ARTICLE_TITLE<\/h1>\n<p>\u5728\u5f53\u4eca\u6570\u5b57\u5316\u65f6\u4ee3\uff0c\u8206\u60c5\u76d1\u6d4b\u5df2\u6210\u4e3a\u4f01\u4e1a\u54c1\u724c\u7ba1\u7406\u3001\u653f\u5e9c\u516c\u5171\u4e8b\u52a1\u5904\u7406\u4ee5\u53ca\u5e02\u573a\u7ade\u4e89\u5206\u6790\u7684\u91cd\u8981\u624b\u6bb5\u3002\u968f\u7740\u793e\u4ea4\u5a92\u4f53\u5e73\u53f0\u7684\u591a\u5143\u5316\u53d1\u5c55\uff0c\u8206\u60c5\u6570\u636e\u6765\u6e90\u65e5\u76ca\u4e30\u5bcc\uff0c\u5982\u4f55\u9ad8\u6548\u3001\u51c6\u786e\u5730\u91c7\u96c6\u548c\u5206\u6790\u8fd9\u4e9b\u6570\u636e\uff0c\u6210\u4e3a\u4e86\u8206\u60c5\u76d1\u6d4b\u7cfb\u7edf\u7684\u6838\u5fc3\u6311\u6218\u3002\u672c\u6587\u5c06\u56f4\u7ed5\u8206\u60c5\u76d1\u6d4b\u7684\u6280\u672f\u5b9e\u73b0\uff0c\u6df1\u5165\u63a2\u8ba8API\u6570\u636e\u5bf9\u63a5\u3001\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u3001\u5fae\u4fe1\u89c6\u9891\u53f7\u5185\u5bb9\u91c7\u96c6\u4ee5\u53ca\u53cd\u722c\u866b\u7b56\u7565\u7b49\u5173\u952e\u8bdd\u9898\u3002<\/p>\n<h2>\u4e00\u3001\u8206\u60c5\u76d1\u6d4b\u4e0eAPI\u63a5\u53e3\u7684\u6570\u636e\u5bf9\u63a5<\/h2>\n<p>API\uff08Application Programming Interface\uff0c\u5e94\u7528\u7a0b\u5e8f\u7f16\u7a0b\u63a5\u53e3\uff09\u662f\u73b0\u4ee3\u8f6f\u4ef6\u7cfb\u7edf\u4e4b\u95f4\u8fdb\u884c\u6570\u636e\u4ea4\u6362\u548c\u529f\u80fd\u8c03\u7528\u7684\u91cd\u8981\u6865\u6881\u3002\u5728\u8206\u60c5\u76d1\u6d4b\u9886\u57df\uff0c\u901a\u8fc7\u5404\u5927\u5e73\u53f0\u63d0\u4f9b\u7684\u5b98\u65b9API\u63a5\u53e3\uff0c\u53ef\u4ee5\u9ad8\u6548\u3001\u5408\u89c4\u5730\u83b7\u53d6\u516c\u5f00\u7684\u8206\u60c5\u6570\u636e\u3002\u76f8\u6bd4\u4e8e\u4f20\u7edf\u7684\u7f51\u9875\u722c\u866b\u65b9\u5f0f\uff0cAPI\u63a5\u53e3\u5177\u6709\u6570\u636e\u51c6\u786e\u6027\u9ad8\u3001\u7a33\u5b9a\u6027\u5f3a\u3001\u6cd5\u5f8b\u98ce\u9669\u4f4e\u7b49\u663e\u8457\u4f18\u52bf\u3002<\/p>\n<p>\u4ee5\u5fae\u535a\u5f00\u653e\u5e73\u53f0\u4e3a\u4f8b\uff0c\u5176API\u63a5\u53e3\u5141\u8bb8\u5f00\u53d1\u8005\u901a\u8fc7OAuth2.0\u8ba4\u8bc1\u540e\uff0c\u83b7\u53d6\u6307\u5b9a\u5173\u952e\u8bcd\u4e0b\u7684\u5fae\u535a\u5185\u5bb9\u3001\u7528\u6237\u4fe1\u606f\u3001\u8bc4\u8bba\u6570\u636e\u7b49\u3002\u4e00\u4e2a\u5178\u578b\u7684\u5fae\u535aAPI\u8c03\u7528\u793a\u4f8b\uff08Python\uff09\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python\">\nimport requests\nimport time\nimport hashlib\nimport random\n\nclass WeiboAPIClient:\n    \"\"\"\u5fae\u535aAPI\u5ba2\u6237\u7aef\"\"\"\n    \n    def __init__(self, app_key, app_secret, access_token):\n        self.app_key = app_key\n        self.app_secret = app_secret\n        self.access_token = access_token\n        self.base_url = \"https:\/\/api.weibo.com\/2\"\n    \n    def get_statuses(self, keyword, count=100):\n        \"\"\"\n        \u83b7\u53d6\u6307\u5b9a\u5173\u952e\u8bcd\u7684\u5fae\u535a\u5185\u5bb9\n        \n        Args:\n            keyword: \u641c\u7d22\u5173\u952e\u8bcd\n            count: \u8fd4\u56de\u7ed3\u679c\u6570\u91cf\uff0c\u6700\u5927100\n        \n        Returns:\n            dict: API\u54cd\u5e94\u6570\u636e\n        \"\"\"\n        endpoint = f\"{self.base_url}\/search\/statuses.json\"\n        params = {\n            'access_token': self.access_token,\n            'q': keyword,\n            'count': count,\n            'page': 1,\n            'range': 800,  # \u53ea\u641c\u7d22\u539f\u521b\u5fae\u535a\n            'sort': 2  # \u6309\u65f6\u95f4\u6392\u5e8f\n        }\n        \n        try:\n            response = requests.get(endpoint, params=params, timeout=10)\n            result = response.json()\n            \n            if 'statuses' in result:\n                return {\n                    'success': True,\n                    'data': result['statuses'],\n                    'count': len(result['statuses'])\n                }\n            else:\n                return {\n                    'success': False,\n                    'error': result.get('error', 'Unknown error')\n                }\n        except requests.RequestException as e:\n            return {'success': False, 'error': str(e)}\n    \n    def parse_weibo_content(self, statuses):\n        \"\"\"\n        \u89e3\u6790\u5fae\u535a\u5185\u5bb9\uff0c\u63d0\u53d6\u5173\u952e\u4fe1\u606f\n        \n        Args:\n            statuses: \u5fae\u535a\u72b6\u6001\u5217\u8868\n        \n        Returns:\n            list: \u89e3\u6790\u540e\u7684\u8206\u60c5\u6570\u636e\n        \"\"\"\n        parsed_data = []\n        for status in statuses:\n            item = {\n                'mid': status.get('idstr'),\n                'text': status.get('text', ''),\n                'created_at': status.get('created_at'),\n                'user': status.get('user', {}).get('screen_name'),\n                'followers_count': status.get('user', {}).get('followers_count'),\n                'reposts_count': status.get('reposts_count', 0),\n                'comments_count': status.get('comments_count', 0),\n                'attitudes_count': status.get('attitudes_count', 0),\n                'sentiment': self._analyze_sentiment(status.get('text', ''))\n            }\n            parsed_data.append(item)\n        \n        return parsed_data\n    \n    def _analyze_sentiment(self, text):\n        \"\"\"\n        \u7b80\u5355\u7684\u60c5\u611f\u5206\u6790\uff08\u5b9e\u9645\u9879\u76ee\u4e2d\u5e94\u4f7f\u7528\u4e13\u4e1aNLP\u6a21\u578b\uff09\n        \"\"\"\n        positive_words = ['\u597d', '\u68d2', '\u8d5e', '\u4f18\u79c0', '\u6ee1\u610f', '\u559c\u6b22', '\u652f\u6301', '\u70b9\u8d5e']\n        negative_words = ['\u5dee', '\u70c2', '\u7cdf', '\u5931\u671b', '\u8ba8\u538c', '\u53cd\u5bf9', '\u6295\u8bc9', '\u95ee\u9898']\n        \n        pos_count = sum(1 for word in positive_words if word in text)\n        neg_count = sum(1 for word in negative_words if word in text)\n        \n        if pos_count > neg_count:\n            return 'positive'\n        elif neg_count > pos_count:\n            return 'negative'\n        else:\n            return 'neutral'\n\n# \u4f7f\u7528\u793a\u4f8b\nif __name__ == \"__main__\":\n    client = WeiboAPIClient(\n        app_key=\"your_app_key\",\n        app_secret=\"your_app_secret\", \n        access_token=\"your_access_token\"\n    )\n    \n    # \u641c\u7d22\"\u8206\u60c5\u76d1\u6d4b\"\u76f8\u5173\u5fae\u535a\n    result = client.get_statuses(\"\u8206\u60c5\u76d1\u6d4b\", count=50)\n    \n    if result['success']:\n        print(f\"\u6210\u529f\u83b7\u53d6 {result['count']} \u6761\u5fae\u535a\")\n        parsed = client.parse_weibo_content(result['data'])\n        \n        for item in parsed[:5]:  # \u53ea\u5c55\u793a\u524d5\u6761\n            print(f\"\u7528\u6237: {item['user']}\")\n            print(f\"\u5185\u5bb9: {item['text'][:50]}...\")\n            print(f\"\u60c5\u611f: {item['sentiment']}\")\n            print(\"-\" * 50)\n    else:\n        print(f\"\u83b7\u53d6\u5931\u8d25: {result['error']}\")\n<\/code><\/pre>\n<p>\u4e0a\u8ff0\u4ee3\u7801\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528Python\u4e0e\u5fae\u535aAPI\u8fdb\u884c\u4ea4\u4e92\uff0c\u83b7\u53d6\u6307\u5b9a\u5173\u952e\u8bcd\u4e0b\u7684\u5fae\u535a\u6570\u636e\uff0c\u5e76\u8fdb\u884c\u57fa\u672c\u7684\u89e3\u6790\u548c\u60c5\u611f\u5206\u6790\u3002\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u8206\u60c5\u76d1\u6d4b\u7cfb\u7edf\u901a\u5e38\u4f1a\u540c\u65f6\u5bf9\u63a5\u591a\u4e2a\u793e\u4ea4\u5a92\u4f53\u5e73\u53f0\u7684API\uff0c\u5982\u5fae\u4fe1\u516c\u4f17\u53f7\u3001\u6296\u97f3\u3001\u5feb\u624b\u7b49\uff0c\u4ee5\u5b9e\u73b0\u5168\u65b9\u4f4d\u7684\u8206\u60c5\u6570\u636e\u91c7\u96c6\u3002<\/p>\n<h2>\u4e8c\u3001\u8206\u60c5\u76d1\u6d4b\u4e2d\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u7684\u6280\u672f\u5b9e\u73b0<\/h2>\n<p>\u5c0f\u7ea2\u4e66\u4f5c\u4e3a\u56fd\u5185\u9886\u5148\u7684\u751f\u6d3b\u65b9\u5f0f\u5206\u4eab\u5e73\u53f0\uff0c\u805a\u96c6\u4e86\u5927\u91cf\u5e74\u8f7b\u7528\u6237\u7fa4\u4f53\uff0c\u5176\u7b14\u8bb0\u5185\u5bb9\u6db5\u76d6\u7f8e\u5986\u3001\u7a7f\u642d\u3001\u65c5\u884c\u3001\u7f8e\u98df\u7b49\u591a\u4e2a\u9886\u57df\uff0c\u5df2\u6210\u4e3a\u54c1\u724c\u8206\u60c5\u76d1\u6d4b\u7684\u91cd\u8981\u6570\u636e\u6765\u6e90\u3002\u5c0f\u7ea2\u4e66\u7684\u6570\u636e\u91c7\u96c6\u4e3b\u8981\u901a\u8fc7\u4ee5\u4e0b\u51e0\u79cd\u6280\u672f\u624b\u6bb5\u5b9e\u73b0\uff1a<\/p>\n<p>\u9996\u5148\u662f\u901a\u8fc7\u5c0f\u7ea2\u4e66\u5f00\u653e\u5e73\u53f0\u63d0\u4f9b\u7684\u5b98\u65b9API\u63a5\u53e3\u3002\u5f00\u53d1\u8005\u9700\u8981\u5728\u5c0f\u7ea2\u4e66\u5f00\u653e\u5e73\u53f0\u6ce8\u518c\u5e94\u7528\uff0c\u83b7\u53d6AppKey\u548cAppSecret\uff0c\u901a\u8fc7\u8c03\u7528\u76f8\u5e94\u7684API\u63a5\u53e3\u83b7\u53d6\u7b14\u8bb0\u6570\u636e\u3001\u7528\u6237\u4fe1\u606f\u3001\u8bc4\u8bba\u6570\u636e\u7b49\u3002\u5b98\u65b9API\u7684\u4f18\u52bf\u5728\u4e8e\u6570\u636e\u51c6\u786e\u6027\u9ad8\u3001\u7a33\u5b9a\u6027\u597d\uff0c\u4f46\u9700\u8981\u7533\u8bf7\u5e76\u901a\u8fc7\u5ba1\u6838\u3002<\/p>\n<p>\u5176\u6b21\u662f\u901a\u8fc7\u7f51\u9875\u722c\u866b\u65b9\u5f0f\u91c7\u96c6\u6570\u636e\u3002\u5bf9\u4e8e\u516c\u5f00\u7684\u7b14\u8bb0\u5185\u5bb9\uff0c\u53ef\u4ee5\u901a\u8fc7\u5206\u6790\u5c0f\u7ea2\u4e66\u7f51\u9875\u7684\u8bf7\u6c42\u53c2\u6570\uff0c\u6a21\u62df\u6d4f\u89c8\u5668\u53d1\u9001\u8bf7\u6c42\u6765\u83b7\u53d6\u6570\u636e\u3002\u4ee5\u4e0b\u662f\u4e00\u4e2a\u57fa\u4e8ePython\u7684\u5c0f\u7ea2\u4e66\u7b14\u8bb0\u6570\u636e\u91c7\u96c6\u793a\u4f8b\uff1a<\/p>\n<pre><code class=\"language-python\">\nimport requests\nimport json\nimport time\nimport random\nfrom urllib.parse import urlencode\n\nclass XiaohongshuCrawler:\n    \"\"\"\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u5668\"\"\"\n    \n    def __init__(self):\n        self.headers = {\n            'User-Agent': 'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/120.0.0.0 Safari\/537.36',\n            'Accept': 'application\/json, text\/plain, *\/*',\n            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',\n            'Referer': 'https:\/\/www.xiaohongshu.com\/',\n            'Cookie': 'webId=xxx; webBuild=xxx; xsecappid=xxx'\n        }\n        self.session = requests.Session()\n        self.session.headers.update(self.headers)\n    \n    def search_notes(self, keyword, page=1, page_size=20):\n        \"\"\"\n        \u641c\u7d22\u5c0f\u7ea2\u4e66\u7b14\u8bb0\n        \n        Args:\n            keyword: \u641c\u7d22\u5173\u952e\u8bcd\n            page: \u9875\u7801\n            page_size: \u6bcf\u9875\u6570\u91cf\n        \n        Returns:\n            dict: \u641c\u7d22\u7ed3\u679c\n        \"\"\"\n        api_url = \"https:\/\/edith.xiaohongshu.com\/api\/sns\/web\/v1\/search\/notes\"\n        \n        # \u641c\u7d22\u8bf7\u6c42\u53c2\u6570\uff08\u52a0\u5bc6\u524d\u7ed3\u6784\uff09\n        payload = {\n            'keyword': keyword,\n            'page': page,\n            'page_size': page_size,\n            'search_id': self._generate_search_id(),\n            'sort': 'general',\n            'note_type': 0,  # 0\u4e3a\u5168\u90e8\u7c7b\u578b\n            'ext_flags': [],\n            'image_formats': ['jpg', 'webp', 'avif']\n        }\n        \n        # \u7b7e\u540d\u53c2\u6570\uff08\u5b9e\u9645\u9700\u8981\u6839\u636e\u5c0f\u7ea2\u4e66\u7684\u7b7e\u540d\u7b97\u6cd5\u751f\u6210\uff09\n        # \u6b64\u5904\u7701\u7565\u7b7e\u540d\u903b\u8f91...\n        \n        try:\n            response = self.session.post(\n                api_url,\n                json=payload,\n                timeout=15\n            )\n            \n            if response.status_code == 200:\n                result = response.json()\n                if result.get('success'):\n                    return {\n                        'success': True,\n                        'data': result.get('data', {}).get('items', []),\n                        'has_more': result.get('data', {}).get('has_more', False)\n                    }\n                else:\n                    return {\n                        'success': False,\n                        'error': result.get('msg', 'Request failed')\n                    }\n            else:\n                return {\n                    'success': False,\n                    'error': f'HTTP {response.status_code}'\n                }\n                \n        except requests.RequestException as e:\n            return {'success': False, 'error': str(e)}\n    \n    def parse_note_detail(self, note_id):\n        \"\"\"\n        \u83b7\u53d6\u7b14\u8bb0\u8be6\u60c5\n        \n        Args:\n            note_id: \u7b14\u8bb0ID\n        \n        Returns:\n            dict: \u7b14\u8bb0\u8be6\u60c5\n        \"\"\"\n        api_url = f\"https:\/\/edith.xiaohongshu.com\/api\/sns\/web\/v1\/feed\"\n        \n        payload = {\n            'source_note_id': note_id,\n            'image_formats': ['jpg', 'webp', 'avif']\n        }\n        \n        try:\n            response = self.session.post(api_url, json=payload, timeout=15)\n            result = response.json()\n            \n            if result.get('success'):\n                note_data = result.get('data', {}).get('items', [{}])[0].get('note_card', {})\n                \n                return {\n                    'success': True,\n                    'data': {\n                        'note_id': note_data.get('note_id'),\n                        'title': note_data.get('title'),\n                        'desc': note_data.get('desc'),\n                        'user': note_data.get('user', {}).get('nickname'),\n                        'liked_count': note_data.get('interact_info', {}).get('liked_count'),\n                        'collected_count': note_data.get('interact_info', {}).get('collected_count'),\n                        'comment_count': note_data.get('interact_info', {}).get('comment_count'),\n                        'share_count': note_data.get('interact_info', {}).get('share_count'),\n                        'tags': [tag.get('name') for tag in note_data.get('tag_list', [])],\n                        'created_at': note_data.get('time')\n                    }\n                }\n            else:\n                return {'success': False, 'error': result.get('msg')}\n                \n        except Exception as e:\n            return {'success': False, 'error': str(e)}\n    \n    def _generate_search_id(self):\n        \"\"\"\u751f\u6210\u641c\u7d22ID\uff08\u7528\u4e8e\u8ffd\u8e2a\u548c\u53bb\u91cd\uff09\"\"\"\n        import uuid\n        return str(uuid.uuid4())\n    \n    def batch_search(self, keywords, delay=2):\n        \"\"\"\n        \u6279\u91cf\u641c\u7d22\u591a\u4e2a\u5173\u952e\u8bcd\n        \n        Args:\n            keywords: \u5173\u952e\u8bcd\u5217\u8868\n            delay: \u8bf7\u6c42\u95f4\u9694\uff08\u79d2\uff09\n        \"\"\"\n        all_results = []\n        \n        for keyword in keywords:\n            print(f\"\u6b63\u5728\u641c\u7d22: {keyword}\")\n            \n            page = 1\n            has_more = True\n            \n            while has_more and page <= 5:  # \u9650\u5236\u6bcf\u5173\u952e\u8bcd\u6700\u591a5\u9875\n                result = self.search_notes(keyword, page=page)\n                \n                if result['success']:\n                    all_results.extend(result['data'])\n                    has_more = result['has_more']\n                    print(f\"  \u7b2c{page}\u9875\u5b8c\u6210\uff0c\u83b7\u53d6{len(result['data'])}\u6761\")\n                else:\n                    print(f\"  \u7b2c{page}\u9875\u5931\u8d25: {result['error']}\")\n                    break\n                \n                page += 1\n                time.sleep(delay + random.uniform(0, 1))  # \u968f\u673a\u5ef6\u65f6\n            \n            # \u5173\u952e\u8bcd\u95f4\u9694\n            time.sleep(delay * 2)\n        \n        return all_results\n\n# \u4f7f\u7528\u793a\u4f8b\nif __name__ == \"__main__\":\n    crawler = XiaohongshuCrawler()\n    \n    # \u641c\u7d22\u8206\u60c5\u76f8\u5173\u7b14\u8bb0\n    keywords = [\"\u54c1\u724c\u8206\u60c5\", \"\u4f01\u4e1a\u53e3\u7891\", \"\u5371\u673a\u516c\u5173\", \"\u8206\u60c5\u76d1\u6d4b\"]\n    results = crawler.batch_search(keywords)\n    \n    print(f\"\\n\u5171\u83b7\u53d6 {len(results)} \u6761\u7b14\u8bb0\u6570\u636e\")\n<\/code><\/pre>\n<p>\u9700\u8981\u7279\u522b\u8bf4\u660e\u7684\u662f\uff0c\u5728\u8fdb\u884c\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u65f6\uff0c\u52a1\u5fc5\u9075\u5b88\u5c0f\u7ea2\u4e66\u7684\u670d\u52a1\u6761\u6b3e\u548c\u76f8\u5173\u6cd5\u5f8b\u6cd5\u89c4\u3002\u722c\u866b\u884c\u4e3a\u5e94\u5f53\u9075\u5faa\"robots.txt\"\u534f\u8bae\uff0c\u91c7\u96c6\u7684\u6570\u636e\u4ec5\u7528\u4e8e\u5408\u6cd5\u7684\u8206\u60c5\u5206\u6790\u76ee\u7684\uff0c\u4e0d\u5f97\u8fdb\u884c\u5546\u4e1a\u5316\u6ee5\u7528\u6216\u4fb5\u72af\u7528\u6237\u9690\u79c1\u3002<\/p>\n<h2>\u4e09\u3001\u5fae\u4fe1\u89c6\u9891\u53f7\u5185\u5bb9\u91c7\u96c6\u5728\u8206\u60c5\u5206\u6790\u4e2d\u7684\u5e94\u7528<\/h2>\n<p>\u5fae\u4fe1\u89c6\u9891\u53f7\u4f5c\u4e3a\u5fae\u4fe1\u751f\u6001\u5185\u7684\u77ed\u89c6\u9891\u5e73\u53f0\uff0c\u51ed\u501f\u5176\u793e\u4ea4\u5173\u7cfb\u94fe\u7684\u72ec\u7279\u4f18\u52bf\uff0c\u5df2\u6210\u4e3a\u8206\u60c5\u4f20\u64ad\u7684\u91cd\u8981\u9635\u5730\u3002\u89c6\u9891\u53f7\u5185\u5bb9\u5177\u6709\u4f20\u64ad\u901f\u5ea6\u5feb\u3001\u793e\u4ea4\u5c5e\u6027\u5f3a\u3001\u7528\u6237\u7c98\u6027\u9ad8\u7b49\u7279\u70b9\uff0c\u5bf9\u4e8e\u4f01\u4e1a\u54c1\u724c\u8206\u60c5\u76d1\u6d4b\u800c\u8a00\u5177\u6709\u4e0d\u53ef\u66ff\u4ee3\u7684\u4ef7\u503c\u3002<\/p>\n<p>\u89c6\u9891\u53f7\u6570\u636e\u7684\u91c7\u96c6\u4e3b\u8981\u901a\u8fc7\u4ee5\u4e0b\u6280\u672f\u65b9\u6848\u5b9e\u73b0\uff1a<\/p>\n<p>\u65b9\u6848\u4e00\u662f\u901a\u8fc7\u5fae\u4fe1\u5f00\u653e\u5e73\u53f0\u63d0\u4f9b\u7684\u5b98\u65b9\u6570\u636e\u63a5\u53e3\u3002\u7b26\u5408\u6761\u4ef6\u7684\u5f00\u53d1\u8005\u53ef\u4ee5\u7533\u8bf7\u83b7\u53d6\u89c6\u9891\u53f7\u7684\u90e8\u5206\u6570\u636e\u63a5\u53e3\u6743\u9650\uff0c\u5305\u62ec\u89c6\u9891\u53d1\u5e03\u3001\u7528\u6237\u4e92\u52a8\u7b49\u6570\u636e\u3002\u5b98\u65b9\u63a5\u53e3\u7684\u4f18\u52bf\u662f\u6570\u636e\u51c6\u786e\u3001\u5408\u89c4\u7a33\u5b9a\uff0c\u4f46\u9700\u8981\u4f01\u4e1a\u8d44\u8d28\u4e14\u5ba1\u6838\u8f83\u4e3a\u4e25\u683c\u3002<\/p>\n<p>\u65b9\u6848\u4e8c\u662f\u901a\u8fc7\u7b2c\u4e09\u65b9\u6570\u636e\u670d\u52a1\u5e73\u53f0\u83b7\u53d6\u6570\u636e\u3002\u5e02\u573a\u4e0a\u5b58\u5728\u591a\u5bb6\u4e13\u4e1a\u7684\u5fae\u4fe1\u6570\u636e\u670d\u52a1\u5546\uff0c\u63d0\u4f9b\u89c6\u9891\u53f7\u7684\u70b9\u8d5e\u3001\u8bc4\u8bba\u3001\u8f6c\u53d1\u7b49\u6570\u636e\u63a5\u53e3\u670d\u52a1\u3002\u8fd9\u79cd\u65b9\u5f0f\u9002\u7528\u4e8e\u6ca1\u6709\u5f00\u53d1\u80fd\u529b\u6216\u4e0d\u60f3\u81ea\u884c\u7ef4\u62a4\u722c\u866b\u7cfb\u7edf\u7684\u4f01\u4e1a\u3002<\/p>\n<p>\u65b9\u6848\u4e09\u662f\u901a\u8fc7\u6280\u672f\u624b\u6bb5\u91c7\u96c6\u516c\u5f00\u6570\u636e\u3002\u4ee5\u4e0b\u662f\u4e00\u4e2a\u7b80\u5316\u7684\u89c6\u9891\u53f7\u6570\u636e\u91c7\u96c6\u793a\u4f8b\uff08\u4ec5\u4f9b\u53c2\u8003\uff0c\u5b9e\u9645\u4f7f\u7528\u9700\u9075\u5b88\u76f8\u5173\u6cd5\u89c4\uff09\uff1a<\/p>\n<pre><code class=\"language-python\">\nimport requests\nimport json\nimport time\nimport re\n\nclass VideoAccountCollector:\n    \"\"\"\u89c6\u9891\u53f7\u6570\u636e\u91c7\u96c6\u5668\"\"\"\n    \n    def __init__(self):\n        self.headers = {\n            'User-Agent': 'Mozilla\/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit\/605.1.15 (KHTML, like Gecko) Version\/14.0 Mobile\/15E148 Safari\/604.1',\n            'Accept': 'application\/json, text\/plain, *\/*',\n            'Accept-Language': 'zh-CN,zh;q=0.9',\n        }\n    \n    def get_video_list(self, keyword, count=20):\n        \"\"\"\n        \u83b7\u53d6\u89c6\u9891\u53f7\u641c\u7d22\u7ed3\u679c\n        \n        Args:\n            keyword: \u641c\u7d22\u5173\u952e\u8bcd\n            count: \u83b7\u53d6\u6570\u91cf\n        \n        Returns:\n            list: \u89c6\u9891\u5217\u8868\n        \"\"\"\n        # \u5fae\u4fe1\u641c\u4e00\u641c\u89c6\u9891\u53f7\u641c\u7d22\u63a5\u53e3\n        url = \"https:\/\/weixin.qq.com\/x\/o\/search\"\n        \n        params = {\n            'action': 'video_search',\n            'keyword': keyword,\n            'count': count\n        }\n        \n        try:\n            response = requests.get(\n                url, \n                params=params, \n                headers=self.headers,\n                timeout=15\n            )\n            \n            if response.status_code == 200:\n                data = response.json()\n                return {\n                    'success': True,\n                    'videos': data.get('videos', []),\n                    'count': len(data.get('videos', []))\n                }\n            else:\n                return {\n                    'success': False,\n                    'error': f'HTTP {response.status_code}'\n                }\n                \n        except Exception as e:\n            return {'success': False, 'error': str(e)}\n    \n    def parse_video_info(self, video_data):\n        \"\"\"\n        \u89e3\u6790\u89c6\u9891\u4fe1\u606f\n        \n        Args:\n            video_data: \u539f\u59cb\u89c6\u9891\u6570\u636e\n        \n        Returns:\n            dict: \u89e3\u6790\u540e\u7684\u89c6\u9891\u4fe1\u606f\n        \"\"\"\n        return {\n            'title': video_data.get('title', ''),\n            'author': video_data.get('nickname', ''),\n            'desc': video_data.get('description', ''),\n            'like_count': video_data.get('like_count', 0),\n            'comment_count': video_data.get('comment_count', 0),\n            'share_count': video_data.get('share_count', 0),\n            'publish_time': video_data.get('create_time', ''),\n            'url': video_data.get('video_url', ''),\n            'cover': video_data.get('cover_url', '')\n        }\n    \n    def sentiment_analysis(self, text):\n        \"\"\"\n        \u57fa\u4e8e\u5173\u952e\u8bcd\u7684\u60c5\u611f\u5206\u6790\n        \n        Args:\n            text: \u5f85\u5206\u6790\u6587\u672c\n        \n        Returns:\n            str: \u60c5\u611f\u503e\u5411 (positive\/negative\/neutral)\n        \"\"\"\n        positive_keywords = ['\u652f\u6301', '\u70b9\u8d5e', '\u68d2', '\u5389\u5bb3', '\u559c\u6b22', '\u4f18\u79c0', '\u8d5e', '\u597d']\n        negative_keywords = ['\u5931\u671b', '\u5dee\u8bc4', '\u5751', '\u9a97', '\u5783\u573e', '\u8ba8\u538c', '\u65e0\u8bed', '\u95ee\u9898']\n        \n        pos_score = sum(1 for w in positive_keywords if w in text)\n        neg_score = sum(1 for w in negative_keywords if w in text)\n        \n        if pos_score > neg_score:\n            return 'positive'\n        elif neg_score > pos_score:\n            return 'negative'\n        return 'neutral'\n\ndef main():\n    collector = VideoAccountCollector()\n    \n    # \u91c7\u96c6\u4f01\u4e1a\u54c1\u724c\u76f8\u5173\u89c6\u9891\n    keywords = [\"\u4f01\u4e1a\u54c1\u724c\", \"\u4ea7\u54c1\u8d28\u91cf\", \"\u552e\u540e\u670d\u52a1\", \"\u7528\u6237\u8bc4\u4ef7\"]\n    \n    all_videos = []\n    \n    for keyword in keywords:\n        print(f\"\u641c\u7d22\u5173\u952e\u8bcd: {keyword}\")\n        result = collector.get_video_list(keyword)\n        \n        if result['success']:\n            for video in result['videos'][:10]:\n                info = collector.parse_video_info(video)\n                info['keyword'] = keyword\n                info['sentiment'] = collector.sentiment_analysis(\n                    info['title'] + ' ' + info['desc']\n                )\n                all_videos.append(info)\n            \n            print(f\"  \u83b7\u53d6\u5230 {len(result['videos'])} \u4e2a\u89c6\u9891\")\n        \n        time.sleep(2)  # \u8bf7\u6c42\u95f4\u9694\n    \n    # \u60c5\u611f\u7edf\u8ba1\n    sentiments = {'positive': 0, 'negative': 0, 'neutral': 0}\n    for video in all_videos:\n        sentiments] += 1\n    \n    print(f\"\\n=== \u8206\u60c5\u5206\u6790\u62a5\u544a ===\")\n    print(f\"\u603b\u89c6\u9891\u6570: {len(all_videos)}\")\n    print(f\"\u6b63\u9762\u8bc4\u4ef7: {sentiments['positive']}\")\n    print(f\"\u8d1f\u9762\u8bc4\u4ef7: {sentiments['negative']}\")\n    print(f\"\u4e2d\u6027\u8bc4\u4ef7: {sentiments['neutral']}\")\n\nif __name__ == \"__main__\":\n    main()\n<\/code><\/pre>\n<p>\u89c6\u9891\u53f7\u8206\u60c5\u76d1\u6d4b\u7684\u72ec\u7279\u4ef7\u503c\u5728\u4e8e\u5176\u793e\u4ea4\u4f20\u64ad\u5c5e\u6027\u3002\u901a\u8fc7\u5206\u6790\u89c6\u9891\u7684\u70b9\u8d5e\u3001\u8bc4\u8bba\u3001\u5206\u4eab\u7b49\u4e92\u52a8\u6570\u636e\uff0c\u53ef\u4ee5\u8bc4\u4f30\u8206\u60c5\u4e8b\u4ef6\u7684\u4f20\u64ad\u8303\u56f4\u548c\u5f71\u54cd\u6df1\u5ea6\uff0c\u4e3a\u4f01\u4e1a\u51b3\u7b56\u63d0\u4f9b\u6570\u636e\u652f\u6491\u3002<\/p>\n<h2>\u56db\u3001\u8206\u60c5\u76d1\u6d4b\u8f6f\u4ef6\u7684\u53cd\u722c\u866b\u7b56\u7565\u4e0e\u5b9e\u6218\u6280\u5de7<\/h2>\n<p>\u5728\u8206\u60c5\u76d1\u6d4b\u7684\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u6211\u4eec\u4e0d\u4ec5\u9700\u8981\u91c7\u96c6\u522b\u4eba\u7f51\u7ad9\u7684\u6570\u636e\uff0c\u4e5f\u9700\u8981\u4fdd\u62a4\u81ea\u5df1\u7684\u5e73\u53f0\u4e0d\u88ab\u6076\u610f\u722c\u53d6\u3002\u53cd\u722c\u866b\u7b56\u7565\u662f\u6bcf\u4e00\u4e2a\u5185\u5bb9\u5e73\u53f0\u90fd\u9700\u8981\u8ba4\u771f\u5bf9\u5f85\u7684\u6280\u672f\u8bfe\u9898\u3002\u672c\u6587\u5c06\u4ece\u722c\u866b\u65b9\u548c\u9632\u5b88\u65b9\u4e24\u4e2a\u89d2\u5ea6\uff0c\u4ecb\u7ecd\u5e38\u89c1\u7684\u53cd\u722c\u866b\u7b56\u7565\u53ca\u5176\u5e94\u5bf9\u65b9\u6cd5\u3002<\/p>\n<h3>4.1 \u5e38\u89c1\u7684\u53cd\u722c\u866b\u7b56\u7565<\/h3>\n<p><strong>1. IP\u5c01\u7981\u7b56\u7565<\/strong><\/p>\n<p>\u8fd9\u662f\u6700\u57fa\u7840\u7684\u53cd\u722c\u866b\u624b\u6bb5\u3002\u5f53\u7cfb\u7edf\u68c0\u6d4b\u5230\u67d0\u4e2aIP\u5730\u5740\u5728\u77ed\u65f6\u95f4\u5185\u53d1\u8d77\u5927\u91cf\u8bf7\u6c42\u65f6\uff0c\u4f1a\u5c06\u5176\u52a0\u5165\u9ed1\u540d\u5355\uff0c\u963b\u6b62\u540e\u7eed\u8bbf\u95ee\u3002\u5e94\u5bf9\u65b9\u5f0f\u5305\u62ec\uff1a<\/p>\n<ul>\n<li>\u4f7f\u7528\u4ee3\u7406IP\u6c60\uff0c\u5b9a\u671f\u66f4\u6362IP\u5730\u5740<\/li>\n<li>\u63a7\u5236\u8bf7\u6c42\u9891\u7387\uff0c\u6a21\u62df\u6b63\u5e38\u7528\u6237\u7684\u8bbf\u95ee\u6a21\u5f0f<\/li>\n<li>\u4f7f\u7528\u5206\u5e03\u5f0f\u722c\u866b\u67b6\u6784\uff0c\u5206\u6563\u8bf7\u6c42\u6765\u6e90<\/li>\n<\/ul>\n<pre><code class=\"language-python\">\nimport requests\nimport random\nimport time\n\nclass ProxyPool:\n    \"\"\"\u4ee3\u7406IP\u6c60\"\"\"\n    \n    def __init__(self):\n        self.proxies = []  # \u4ee3\u7406\u5217\u8868\n        self.current_index = 0\n        self.failed_proxies = set()  # \u5931\u6548\u4ee3\u7406\u8bb0\u5f55\n    \n    def load_proxies(self, proxy_file):\n        \"\"\"\u4ece\u6587\u4ef6\u52a0\u8f7d\u4ee3\u7406\u5217\u8868\"\"\"\n        with open(proxy_file, 'r') as f:\n            for line in f:\n                parts = line.strip().split(':')\n                if len(parts) == 4:\n                    proxy = {\n                        'http': f\"http:\/\/{parts[2]}:{parts[3]}@{parts[0]}:{parts[1]}\",\n                        'https': f\"http:\/\/{parts[2]}:{parts[3]}@{parts[0]}:{parts[1]}\"\n                    }\n                    self.proxies.append(proxy)\n        \n        random.shuffle(self.proxies)\n        print(f\"\u52a0\u8f7d\u4e86 {len(self.proxies)} \u4e2a\u4ee3\u7406\")\n    \n    def get_proxy(self):\n        \"\"\"\u83b7\u53d6\u4e00\u4e2a\u53ef\u7528\u4ee3\u7406\"\"\"\n        attempts = 0\n        while attempts < len(self.proxies):\n            proxy = self.proxies[self.current_index]\n            self.current_index = (self.current_index + 1) % len(self.proxies)\n            \n            if self._is_valid_proxy(proxy):\n                return proxy\n            \n            attempts += 1\n        \n        return None\n    \n    def _is_valid_proxy(self, proxy, test_url=\"http:\/\/www.baidu.com\"):\n        \"\"\"\u9a8c\u8bc1\u4ee3\u7406\u662f\u5426\u6709\u6548\"\"\"\n        try:\n            response = requests.get(test_url, proxies=proxy, timeout=5)\n            return response.status_code == 200\n        except:\n            return False\n    \n    def mark_failed(self, proxy):\n        \"\"\"\u6807\u8bb0\u5931\u6548\u4ee3\u7406\"\"\"\n        self.failed_proxies.add(str(proxy))\n        if proxy in self.proxies:\n            self.proxies.remove(proxy)\n            print(f\"\u4ee3\u7406\u5931\u6548\uff0c\u5df2\u79fb\u9664\uff0c\u5269\u4f59 {len(self.proxies)} \u4e2a\")\n\nclass SmartCrawler:\n    \"\"\"\u667a\u80fd\u722c\u866b - \u5e26\u4ee3\u7406\u548c\u9891\u7387\u63a7\u5236\"\"\"\n    \n    def __init__(self, proxy_pool):\n        self.proxy_pool = proxy_pool\n        self.request_count = 0\n        self.last_request_time = time.time()\n    \n    def fetch(self, url, delay_range=(1, 3)):\n        \"\"\"\n        \u5e26\u9891\u7387\u63a7\u5236\u7684\u6293\u53d6\n        \n        Args:\n            url: \u76ee\u6807URL\n            delay_range: \u8bf7\u6c42\u95f4\u9694\u968f\u673a\u8303\u56f4\uff08\u79d2\uff09\n        \"\"\"\n        # \u63a7\u5236\u8bf7\u6c42\u9891\u7387\n        elapsed = time.time() - self.last_request_time\n        if elapsed < 1:\n            time.sleep(1 - elapsed)\n        \n        # \u66f4\u6362\u4ee3\u7406\n        proxy = self.proxy_pool.get_proxy()\n        \n        headers = {\n            'User-Agent': random.choice(USER_AGENTS),\n            'Accept': 'text\/html,application\/xhtml+xml,application\/xml;q=0.9,*\/*;q=0.8',\n            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',\n        }\n        \n        try:\n            response = requests.get(\n                url, \n                headers=headers,\n                proxies=proxy,\n                timeout=15\n            )\n            \n            self.request_count += 1\n            self.last_request_time = time.time()\n            \n            # \u968f\u673a\u5ef6\u65f6\n            time.sleep(random.uniform(*delay_range))\n            \n            return response\n            \n        except requests.RequestException as e:\n            print(f\"\u8bf7\u6c42\u5931\u8d25: {e}\")\n            return None\n\nUSER_AGENTS = [\n    'Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/120.0.0.0 Safari\/537.36',\n    'Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/120.0.0.0 Safari\/537.36',\n    'Mozilla\/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko\/20100101 Firefox\/121.0',\n    'Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit\/605.1.15 (KHTML, like Gecko) Version\/17.1 Safari\/605.1.15',\n]\n<\/code><\/pre>\n<p><strong>2. \u9a8c\u8bc1\u7801\uff08CAPTCHA\uff09\u6311\u6218<\/strong><\/p>\n<p>\u5f53\u7cfb\u7edf\u68c0\u6d4b\u5230\u53ef\u7591\u884c\u4e3a\u65f6\uff0c\u4f1a\u5f39\u51fa\u9a8c\u8bc1\u7801\u8fdb\u884c\u4eba\u673a\u9a8c\u8bc1\u3002\u5e38\u89c1\u7684\u9a8c\u8bc1\u7801\u7c7b\u578b\u5305\u62ec\u56fe\u7247\u8bc6\u522b\u3001\u6ed1\u5757\u62fc\u56fe\u3001\u70b9\u9009\u6587\u5b57\u7b49\u3002\u5e94\u5bf9\u7b56\u7565\u5305\u62ec\uff1a<\/p>\n<ul>\n<li>\u63a5\u5165\u7b2c\u4e09\u65b9\u6253\u7801\u5e73\u53f0\uff08\u5982\u8d85\u7ea7\u9a8c\u8bc1\u7801\u3001\u4e91\u6253\u7801\u7b49\uff09<\/li>\n<li>\u4f7f\u7528\u56fe\u50cf\u8bc6\u522bAI\u6a21\u578b\u81ea\u52a8\u8bc6\u522b\uff08\u5982OCR\u8bc6\u522b\u6587\u5b57\u9a8c\u8bc1\u7801\uff09<\/li>\n<li>\u4f7f\u7528selenium\u7b49\u5de5\u5177\u6a21\u62df\u771f\u4eba\u64cd\u4f5c\u884c\u4e3a<\/li>\n<\/ul>\n<pre><code class=\"language-python\">\nfrom selenium import webdriver\nfrom selenium.webdriver.common.by import By\nfrom selenium.webdriver.support.ui import WebDriverWait\nfrom selenium.webdriver.support import expected_conditions as EC\nfrom selenium.webdriver.common.action_chains import ActionChains\nimport time\nimport random\n\nclass SeleniumCrawler:\n    \"\"\"\u57fa\u4e8eSelenium\u7684\u771f\u4eba\u884c\u4e3a\u6a21\u62df\u722c\u866b\"\"\"\n    \n    def __init__(self, headless=False):\n        options = webdriver.ChromeOptions()\n        if headless:\n            options.add_argument('--headless')\n        \n        options.add_argument('--user-agent=Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36')\n        options.add_argument('--disable-blink-features=AutomationControlled')\n        options.add_experimental_option('excludeSwitches', ['enable-automation'])\n        \n        self.driver = webdriver.Chrome(options=options)\n        self.driver.execute_script(\"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})\")\n    \n    def human_like_scroll(self):\n        \"\"\"\u6a21\u62df\u4eba\u7c7b\u6eda\u52a8\u9875\u9762\u884c\u4e3a\"\"\"\n        total_height = self.driver.execute_script(\"return document.body.scrollHeight\")\n        current = 0\n        \n        while current < total_height:\n            scroll_step = random.randint(100, 500)\n            current += scroll_step\n            \n            self.driver.execute_script(f\"window.scrollTo(0, {current});\")\n            time.sleep(random.uniform(0.5, 1.5))\n    \n    def human_like_click(self, element):\n        \"\"\"\u6a21\u62df\u4eba\u7c7b\u70b9\u51fb\u884c\u4e3a\"\"\"\n        ActionChains(self.driver).move_to_element(element).perform()\n        time.sleep(random.uniform(0.1, 0.3))\n        element.click()\n    \n    def solve_slider_captcha(self, slider, track_width):\n        \"\"\"\n        \u89e3\u51b3\u6ed1\u5757\u9a8c\u8bc1\u7801\n        \n        Args:\n            slider: \u6ed1\u5757\u5143\u7d20\n            track_width: \u8f68\u9053\u5bbd\u5ea6\n        \"\"\"\n        # \u8ba1\u7b97\u6ed1\u52a8\u8ddd\u79bb\uff08\u7559\u6709\u4e00\u5b9a\u8bef\u5dee\u6a21\u62df\u771f\u4eba\uff09\n        move_distance = int(track_width * 0.9)\n        \n        # \u751f\u6210\u4e0d\u89c4\u5219\u6ed1\u52a8\u8f68\u8ff9\n        steps = random.randint(8, 12)\n        current_pos = 0\n        \n        for i in range(steps):\n            if i == steps - 1:\n                # \u6700\u540e\u4e00\u6b65\u5230\u8fbe\u76ee\u6807\u4f4d\u7f6e\n                remaining = move_distance - current_pos\n                step = remaining\n            else:\n                # \u4e2d\u95f4\u6b65\u9aa4\u5e26\u968f\u673a\u52a0\u901f\u51cf\u901f\n                step = random.randint(5, 20)\n            \n            current_pos += step\n            \n            # \u6a21\u62df\u4eba\u7c7b\u7684\u624b\u52a8\u6ed1\u52a8\uff08\u4e0d\u662f\u4e00\u8e74\u800c\u5c31\uff09\n            ActionChains(self.driver).click_and_hold(slider).perform()\n            ActionChains(self.driver).move_by_offset(step, random.randint(-2, 2)).perform()\n            time.sleep(random.uniform(0.05, 0.15))\n        \n        ActionChains(self.driver).release().perform()\n        time.sleep(0.5)\n<\/code><\/pre>\n<p><strong>3. JavaScript\u6df7\u6dc6\u4e0e\u52a0\u5bc6<\/strong><\/p>\n<p>\u8bb8\u591a\u7f51\u7ad9\u901a\u8fc7JavaScript\u52a8\u6001\u751f\u6210\u8bf7\u6c42\u53c2\u6570\u3001\u52a0\u5bc6\u4f20\u8f93\u6570\u636e\u6216\u6df7\u6dc6\u9875\u9762\u7ed3\u6784\uff0c\u589e\u52a0\u722c\u866b\u89e3\u6790\u96be\u5ea6\u3002\u5e94\u5bf9\u65b9\u6cd5\u662f\u5206\u6790JavaScript\u903b\u8f91\uff0c\u4f7f\u7528Selenium\/Splash\u7b49\u5de5\u5177\u6e32\u67d3\u9875\u9762\uff0c\u6216\u901a\u8fc7\u9006\u5411\u5de5\u7a0b\u8fd8\u539f\u52a0\u5bc6\u7b97\u6cd5\u3002<\/p>\n<h3>4.2 \u53cd\u722c\u866b\u7b56\u7565\u7684\u4f26\u7406\u4e0e\u6cd5\u5f8b\u8fb9\u754c<\/h3>\n<p>\u5728\u8ba8\u8bba\u53cd\u722c\u866b\u6280\u672f\u7684\u540c\u65f6\uff0c\u6211\u4eec\u5fc5\u987b\u5f3a\u8c03\u7f51\u7edc\u722c\u866b\u7684\u4f26\u7406\u548c\u6cd5\u5f8b\u8fb9\u754c\uff1a<\/p>\n<ul>\n<li>\u9075\u5b88\u7f51\u7ad9\u7684robots.txt\u534f\u8bae\u548c\u670d\u52a1\u6761\u6b3e<\/li>\n<li>\u4e0d\u5f97\u91c7\u96c6\u4e2a\u4eba\u9690\u79c1\u4fe1\u606f\u6216\u5546\u4e1a\u79d8\u5bc6<\/li>\n<li>\u63a7\u5236\u8bf7\u6c42\u9891\u7387\uff0c\u907f\u514d\u5bf9\u76ee\u6807\u670d\u52a1\u5668\u9020\u6210\u8d1f\u62c5<\/li>\n<li>\u91c7\u96c6\u7684\u6570\u636e\u4ec5\u7528\u4e8e\u5408\u6cd5\u5408\u89c4\u7684\u8206\u60c5\u5206\u6790\u76ee\u7684<\/li>\n<li>\u5c0a\u91cd\u6570\u636e\u7248\u6743\uff0c\u5408\u7406\u4f7f\u7528\u91c7\u96c6\u5230\u7684\u5185\u5bb9<\/li>\n<\/ul>\n<h2>\u4e94\u3001\u603b\u7ed3\u4e0e\u5c55\u671b<\/h2>\n<p>\u8206\u60c5\u76d1\u6d4b\u6280\u672f\u5728\u6570\u5b57\u5316\u65f6\u4ee3\u53d1\u6325\u7740\u8d8a\u6765\u8d8a\u91cd\u8981\u7684\u4f5c\u7528\u3002\u672c\u6587\u4eceAPI\u6570\u636e\u5bf9\u63a5\u3001\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u3001\u5fae\u4fe1\u89c6\u9891\u53f7\u5185\u5bb9\u91c7\u96c6\u4ee5\u53ca\u53cd\u722c\u866b\u7b56\u7565\u56db\u4e2a\u65b9\u9762\uff0c\u8be6\u7ec6\u4ecb\u7ecd\u4e86\u8206\u60c5\u76d1\u6d4b\u7cfb\u7edf\u7684\u6280\u672f\u5b9e\u73b0\u65b9\u6848\u3002<\/p>\n<p>\u968f\u7740\u4eba\u5de5\u667a\u80fd\u6280\u672f\u7684\u4e0d\u65ad\u53d1\u5c55\uff0c\u8206\u60c5\u76d1\u6d4b\u7cfb\u7edf\u4e5f\u5728\u6301\u7eed\u8fdb\u5316\u3002\u672a\u6765\u7684\u53d1\u5c55\u8d8b\u52bf\u5305\u62ec\uff1a\u57fa\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u7684\u667a\u80fd\u8206\u60c5\u5206\u6790\u3001\u5b9e\u65f6\u89c6\u9891\u5185\u5bb9\u8bc6\u522b\u4e0e\u76d1\u6d4b\u3001\u8de8\u5e73\u53f0\u6570\u636e\u878d\u5408\u5206\u6790\u7b49\u3002\u65e0\u8bba\u662f\u6280\u672f\u5f00\u53d1\u8005\u8fd8\u662f\u4f7f\u7528\u8005\uff0c\u90fd\u9700\u8981\u4e0d\u65ad\u5b66\u4e60\u65b0\u6280\u672f\uff0c\u540c\u65f6\u575a\u5b88\u4f26\u7406\u548c\u6cd5\u5f8b\u5e95\u7ebf\uff0c\u5171\u540c\u7ef4\u62a4\u5065\u5eb7\u6709\u5e8f\u7684\u7f51\u7edc\u7a7a\u95f4\u3002<\/p>\n<p>\u8206\u60c5\u76d1\u6d4b\u4e0d\u4ec5\u662f\u6280\u672f\u95ee\u9898\uff0c\u66f4\u662f\u4e00\u95e8\u827a\u672f\u3002\u5b83\u9700\u8981\u6211\u4eec\u5728\u6570\u636e\u4e0e\u6d1e\u5bdf\u4e4b\u95f4\u627e\u5230\u5e73\u8861\uff0c\u5728\u6548\u7387\u4e0e\u5408\u89c4\u4e4b\u95f4\u628a\u63e1\u5206\u5bf8\u3002\u5e0c\u671b\u672c\u6587\u80fd\u4e3a\u4ece\u4e8b\u8206\u60c5\u76d1\u6d4b\u5de5\u4f5c\u7684\u670b\u53cb\u4eec\u63d0\u4f9b\u4e00\u4e9b\u6709\u4ef7\u503c\u7684\u53c2\u8003\u3002<\/p>\n<\/div>\n<p><\/body><br \/>\n<\/html><\/p>\n","protected":false},"excerpt":{"rendered":"<p>ARTICLE_TITLE ARTICLE_TITLE \u5728\u5f53\u4eca\u6570\u5b57\u5316\u65f6\u4ee3\uff0c\u8206\u60c5\u76d1\u6d4b\u5df2\u6210\u4e3a\u4f01\u4e1a\u54c1\u724c\u7ba1\u7406\u3001\u653f\u5e9c\u516c\u5171\u4e8b\u52a1\u5904\u7406\u4ee5\u53ca\u5e02\u573a\u7ade\u4e89\u5206\u6790\u7684\u91cd\u8981\u624b\u6bb5\u3002\u968f\u7740\u793e\u4ea4\u5a92\u4f53\u5e73\u53f0\u7684\u591a\u5143\u5316\u53d1\u5c55\uff0c\u8206\u60c5\u6570\u636e\u6765\u6e90\u65e5\u76ca\u4e30\u5bcc\uff0c\u5982\u4f55\u9ad8\u6548\u3001\u51c6\u786e\u5730\u91c7\u96c6\u548c\u5206\u6790\u8fd9\u4e9b\u6570\u636e\uff0c\u6210\u4e3a\u4e86\u8206\u60c5\u76d1\u6d4b\u7cfb\u7edf\u7684\u6838\u5fc3\u6311\u6218\u3002\u672c\u6587\u5c06\u56f4\u7ed5\u8206\u60c5\u76d1\u6d4b\u7684\u6280\u672f\u5b9e\u73b0\uff0c\u6df1\u5165\u63a2\u8ba8API\u6570\u636e\u5bf9\u63a5\u3001\u5c0f\u7ea2\u4e66\u6570\u636e\u91c7\u96c6\u3001\u5fae\u4fe1\u89c6\u9891\u53f7\u5185\u5bb9\u91c7\u96c6\u4ee5\u53ca\u53cd\u722c\u866b\u7b56\u7565\u7b49\u5173\u952e\u8bdd\u9898\u3002 \u4e00\u3001\u8206\u60c5\u76d1\u6d4b\u4e0eAPI\u63a5\u53e3\u7684\u6570\u636e\u5bf9\u63a5 API\uff08Application Programming Interface\uff0c\u5e94\u7528\u7a0b\u5e8f\u7f16\u7a0b\u63a5\u53e3\uff09\u662f\u73b0\u4ee3\u8f6f\u4ef6\u7cfb\u7edf\u4e4b\u95f4\u8fdb\u884c\u6570\u636e\u4ea4\u6362\u548c\u529f\u80fd\u8c03\u7528\u7684\u91cd\u8981\u6865\u6881\u3002\u5728\u8206\u60c5\u76d1\u6d4b\u9886\u57df\uff0c\u901a\u8fc7\u5404\u5927\u5e73\u53f0\u63d0\u4f9b\u7684\u5b98\u65b9API\u63a5\u53e3\uff0c\u53ef\u4ee5\u9ad8\u6548\u3001\u5408\u89c4\u5730\u83b7\u53d6\u516c\u5f00\u7684\u8206\u60c5\u6570\u636e\u3002\u76f8\u6bd4\u4e8e\u4f20\u7edf\u7684\u7f51\u9875\u722c\u866b\u65b9\u5f0f\uff0cAPI\u63a5\u53e3\u5177\u6709\u6570\u636e\u51c6\u786e\u6027\u9ad8\u3001\u7a33\u5b9a\u6027\u5f3a\u3001\u6cd5\u5f8b\u98ce\u9669\u4f4e\u7b49\u663e\u8457\u4f18\u52bf\u3002 \u4ee5\u5fae\u535a\u5f00\u653e\u5e73\u53f0\u4e3a\u4f8b\uff0c\u5176API\u63a5\u53e3\u5141\u8bb8\u5f00\u53d1\u8005\u901a\u8fc7OAuth2.0\u8ba4\u8bc1\u540e\uff0c\u83b7\u53d6\u6307\u5b9a\u5173\u952e\u8bcd\u4e0b\u7684\u5fae\u535a\u5185\u5bb9\u3001\u7528\u6237\u4fe1\u606f\u3001\u8bc4\u8bba\u6570\u636e\u7b49\u3002\u4e00\u4e2a\u5178\u578b\u7684\u5fae\u535aAPI\u8c03\u7528\u793a\u4f8b\uff08Python\uff09\u5982\u4e0b\uff1a import requests import time import hashlib import random class WeiboAPIClient: &#8220;&#8221;&#8221;\u5fae\u535aAPI\u5ba2\u6237\u7aef&#8221;&#8221;&#8221; def __init__(self, app_key, app_secret, access_token): self.app_key = app_key self.app_secret = app_secret self.access_token = access_token self.base_url = &#8220;https:\/\/api.weibo.com\/2&#8221; def get_statuses(self, keyword, count=100): &#8220;&#8221;&#8221; \u83b7\u53d6\u6307\u5b9a\u5173\u952e\u8bcd\u7684\u5fae\u535a\u5185\u5bb9 Args: keyword: \u641c\u7d22\u5173\u952e\u8bcd count: \u8fd4\u56de\u7ed3\u679c\u6570\u91cf\uff0c\u6700\u5927100 Returns: dict: API\u54cd\u5e94\u6570\u636e &#8220;&#8221;&#8221; endpoint = f&#8221;{self.base_url}\/search\/statuses.json&#8221; params&hellip; <br \/> <a class=\"read-more\" href=\"http:\/\/www.jiayus.com\/?p=3998\">Read more<\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-3998","post","type-post","status-publish","format-standard","hentry","category-gsxw"],"_links":{"self":[{"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/posts\/3998","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3998"}],"version-history":[{"count":1,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/posts\/3998\/revisions"}],"predecessor-version":[{"id":3999,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=\/wp\/v2\/posts\/3998\/revisions\/3999"}],"wp:attachment":[{"href":"http:\/\/www.jiayus.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3998"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3998"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.jiayus.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3998"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}