{"id":162,"date":"2023-03-29T15:33:04","date_gmt":"2023-03-29T07:33:04","guid":{"rendered":"https:\/\/mianka.xyz\/?p=161"},"modified":"2023-03-29T15:33:59","modified_gmt":"2023-03-29T07:33:59","slug":"python-htmlhtml","status":"publish","type":"post","link":"https:\/\/www.mianka.xyz\/?p=162","title":{"rendered":"python \u5b57\u7b26\u4e32\u91cc\u6709\u4e2d\u6587\u4e0ehtml\u4ee3\u7801\uff0c\u600e\u4e48\u53bb\u6389html\u4ee3\u7801"},"content":{"rendered":"<p>\u4f60\u53ef\u4ee5\u4f7f\u7528Python\u7684\u5185\u7f6e\u5b57\u7b26\u4e32\u64cd\u4f5c\u548c\u6b63\u5219\u8868\u8fbe\u5f0f\u6765\u53bb\u9664HTML\u4ee3\u7801\u3002\u4ee5\u4e0b\u662f\u4e00\u4e2a\u793a\u4f8b\u4ee3\u7801\uff0c\u5b83\u5c06\u5220\u9664\u5b57\u7b26\u4e32\u4e2d\u7684\u6240\u6709HTML\u6807\u8bb0\u548cUnicode\u5b57\u7b26\uff1a<\/p>\n<pre class=\"prism-highlight prism-language-python\">import&nbsp;re\n#&nbsp;\u5b57\u7b26\u4e32\u793a\u4f8b\nmy_string&nbsp;=&nbsp;&quot;\u8fd9\u662f\u4e00\u4e2a&nbsp;&lt;b&gt;\u793a\u4f8b&lt;\/b&gt;&nbsp;\u5b57\u7b26\u4e32\uff0c\u5176\u4e2d\u5305\u542b&lt;p&gt;\u4e00\u4e9b&lt;\/p&gt;&nbsp;HTML&nbsp;\u4ee3\u7801\u548c\u4e2d\u6587\u5b57\u7b26&nbsp;&amp;#8211;&nbsp;\u5220\u9664\u5b83\u4eec\uff01&quot;\n#&nbsp;\u5220\u9664HTML\u6807\u8bb0\u548cUnicode\u5b57\u7b26\nno_tags&nbsp;=&nbsp;re.sub(&#39;&lt;[^&lt;]+?&gt;&#39;,&nbsp;&#39;&#39;,&nbsp;my_string)&nbsp;&nbsp;#&nbsp;\u5220\u9664HTML\u6807\u8bb0\nno_unicode&nbsp;=&nbsp;no_tags.encode(&#39;ascii&#39;,&nbsp;&#39;ignore&#39;).decode(&#39;utf-8&#39;)&nbsp;&nbsp;#&nbsp;\u5220\u9664Unicode\u5b57\u7b26\nprint(no_unicode)\n\u8f93\u51fa\u7ed3\u679c\u5c06\u662f\uff1a\n\u8fd9\u662f\u4e00\u4e2a&nbsp;\u793a\u4f8b&nbsp;\u5b57\u7b26\u4e32\uff0c\u5176\u4e2d\u5305\u542b\u4e00\u4e9b&nbsp;HTML&nbsp;\u4ee3\u7801\u548c\u4e2d\u6587\u5b57\u7b26&nbsp;&nbsp;\u5220\u9664\u5b83\u4eec\uff01<\/pre>\n<p>\u8fd9\u4e2a\u793a\u4f8b\u4ee3\u7801\u4f7f\u7528\u4e86Python\u7684re\u6a21\u5757\u6765\u5220\u9664HTML\u6807\u8bb0\u3002\u6b63\u5219\u8868\u8fbe\u5f0f &lt;[^&lt;]+?&gt; \u8868\u793a\u5339\u914d\u4efb\u4f55\u4ee5 &lt; \u5f00\u5934\u3001\u4ee5 &gt; \u7ed3\u5c3e\u7684\u5b57\u7b26\u4e32\uff0c\u5176\u4e2d [^&lt;]+ \u8868\u793a\u5339\u914d\u9664\u4e86 &lt; \u4e4b\u5916\u7684\u4efb\u4f55\u5b57\u7b26\uff0c+ \u8868\u793a\u81f3\u5c11\u5339\u914d\u4e00\u4e2a\u5b57\u7b26\u3002encode \u548c decode \u51fd\u6570\u662f\u7528\u6765\u5220\u9664Unicode\u5b57\u7b26\u7684\u3002<\/p>\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4f60\u53ef\u4ee5\u4f7f\u7528Python\u7684\u5185\u7f6e\u5b57\u7b26\u4e32\u64cd\u4f5c\u548c\u6b63\u5219\u8868\u8fbe\u5f0f\u6765\u53bb\u9664HTML\u4ee3\u7801\u3002\u4ee5\u4e0b\u662f\u4e00\u4e2a\u793a\u4f8b\u4ee3\u7801\uff0c\u5b83\u5c06\u5220\u9664\u5b57\u7b26\u4e32\u4e2d\u7684\u6240\u6709HTML\u6807\u8bb0\u548cUnicode\u5b57\u7b26\uff1a<\/p>\n<pre class=\"prism-highlight prism-language-python\">import&nbsp;re\n#&nbsp;\u5b57\u7b26\u4e32\u793a\u4f8b\nmy_string&nbsp;=&nbsp;&quot;\u8fd9\u662f\u4e00\u4e2a&nbsp;&lt;b&gt;\u793a\u4f8b&lt;\/b&gt;&nbsp;\u5b57\u7b26\u4e32\uff0c\u5176\u4e2d\u5305\u542b&lt;p&gt;\u4e00\u4e9b&lt;\/p&gt;&nbsp;HTML&nbsp;\u4ee3\u7801\u548c\u4e2d\u6587\u5b57\u7b26&nbsp;&amp;#8211;&nbsp;\u5220\u9664\u5b83\u4eec\uff01&quot;\n#&nbsp;\u5220\u9664HTML\u6807\u8bb0\u548cUnicode\u5b57\u7b26\nno_tags&nbsp;=&nbsp;re.sub(&#39;&lt;[^&lt;]+?&gt;&#39;,&nbsp;&#39;&#39;,&nbsp;my_string)&nbsp;&nbsp;#&nbsp;\u5220\u9664HTML\u6807\u8bb0\nno_unicode&nbsp;=&nbsp;no_tags.encode(&#39;ascii&#39;,&nbsp;&#39;ignore&#39;).decode(&#39;utf-8&#39;)&nbsp;&nbsp;#&nbsp;\u5220\u9664Unicode\u5b57\u7b26\nprint(no_unicode)\n\u8f93\u51fa\u7ed3\u679c\u5c06\u662f\uff1a\n\u8fd9\u662f\u4e00\u4e2a&nbsp;\u793a\u4f8b&nbsp;\u5b57\u7b26\u4e32\uff0c\u5176\u4e2d\u5305\u542b\u4e00\u4e9b&nbsp;HTML&nbsp;\u4ee3\u7801\u548c\u4e2d\u6587\u5b57\u7b26&nbsp;&nbsp;\u5220\u9664\u5b83\u4eec\uff01<\/pre>\n<p><!--autointro--><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3],"tags":[],"class_list":["post-162","post","type-post","status-publish","format-standard","hentry","category-pythonbiji"],"_links":{"self":[{"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=\/wp\/v2\/posts\/162","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=162"}],"version-history":[{"count":0,"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=\/wp\/v2\/posts\/162\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=162"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=162"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.mianka.xyz\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=162"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}