获取html中的注释标签内容


获取html中的注释标签内容


获取注释标签 <!-- comment -->中的内容

比如

    <div id="id1">id1_text
        <div class="class1">class1_text
            <!-- react-text: 2108 -->
            "$$"
            <!-- /react-text -->
        </div>
    </div>
     
    
    from lxml import html
    
    html_string = """
    
    <div id="id1">id1_text
        <div class="class1">class1_text
            <!-- react-text: 2108 -->
            "$$"
            <!-- /react-text -->
        </div>
    </div>
    
    
    """
    
    html1 = html.fromstring(html_string)
    
    nodes = html1.xpath('//div[@class="class1"]/text()')
    # 根据父节点
    
    nodes_ = html1.xpath('//comment()[normalize-space() = "react-text: 2108"]/following-sibling::text()[1]')[0]
    # 根据 comment() 语法
    print(nodes[1])  # "$"
    print(nodes_)    # "$"

Buy me a 肥仔水!