获取html中的注释标签内容
获取注释标签 <!-- comment -->
中的内容
比如
<div id="id1">id1_text
<div class="class1">class1_text
<!-- react-text: 2108 -->
"$$"
<!-- /react-text -->
</div>
</div>
from lxml import html
html_string = """
<div id="id1">id1_text
<div class="class1">class1_text
<!-- react-text: 2108 -->
"$$"
<!-- /react-text -->
</div>
</div>
"""
html1 = html.fromstring(html_string)
nodes = html1.xpath('//div[@class="class1"]/text()')
# 根据父节点
nodes_ = html1.xpath('//comment()[normalize-space() = "react-text: 2108"]/following-sibling::text()[1]')[0]
# 根据 comment() 语法
print(nodes[1]) # "$"
print(nodes_) # "$"