在 CasperJS 中计算给定元素的 XPath

Calculating an XPath of a given element in CasperJS

我正在尝试使用 getElementXPath function from Firebug 在 CasperJS 中工作,但我似乎无法找到正确的位置来调用它以使其工作。这是我目前所拥有的,它仅适用于已经具有 "id" 标记的对象,但这并没有多大帮助,因为我使用 XPath 代替了 id(大多数对象没有 id)

casper.then(function () {
    var Element = this.evaluate(function(){
        var elm = document.querySelector('[class="h4"]');
        return getElementXPath(elm); //Set 1
        //return elm; //Set 2
    });

    console.log('xpath: '+ Element); //Set 1
    //console.log('xpath: '+ getElementXPath(Element)); //Set 2
});

设置 1 始终输出 "xpath: null"

如果元素已经有一个 "id" 标签,Set 2 只会输出正确的路径。 "xpath: //*[id="按钮"]"

Set 2 否则将输出最后一个标签,即 "xpath: /a" 或 "xpath: /span"

这是 Firebug 中的 getElementXPath 函数,我刚刚将其粘贴到我的 JS 文件的顶部。

function getElementXPath(element)
{
    if (element && element.id)
        return '//*[@id="' + element.id + '"]';
    else
        return getElementTreeXPath(element);
};

function getElementTreeXPath(element)
{
    var paths = [];
    // Use nodeName (instead of localName) so namespace prefix is included (if any).
    for (; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode)
    {
        var index = 0;
        var hasFollowingSiblings = false;
        for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling)
        {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;
            if (sibling.nodeName == element.nodeName)
                ++index;
        }

        for (var sibling = element.nextSibling; sibling && !hasFollowingSiblings;
            sibling = sibling.nextSibling)
        {
            if (sibling.nodeName == element.nodeName)
                hasFollowingSiblings = true;
        }
        var tagName = (element.prefix ? element.prefix + ":" : "") + element.localName;
        var pathIndex = (index || hasFollowingSiblings ? "[" + (index + 1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }
    return paths.length ? "/" + paths.join("/") : null;
};

当您收听 "page.error" 事件时,您会看到类似

的内容

Error: ReferenceError: Can't find variable: getElementXPath

这与您在页面中包含该代码的方式有关。以下完整脚本适用于我:

var casper = require('casper').create();

// http://docs.casperjs.org/en/latest/events-filters.html#page-error
casper.on("page.error", function(msg, trace) {
    this.echo("Error: " + msg);
    // maybe make it a little fancier with the code from the PhantomJS equivalent
});

casper.start('http://example.com');

casper.then(function() {               
    this.evaluate(function(){
        window.getElementXPath = function(element)
        {
            if (element && element.id)
                return '//*[@id="' + element.id + '"]';
            else
                return getElementTreeXPath(element);
        };

        function getElementTreeXPath(element)
        {
            var paths = [];
            // Use nodeName (instead of localName) so namespace prefix is included (if any).
            for (; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode)
            {
                var index = 0;
                var hasFollowingSiblings = false;
                for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling)
                {
                    // Ignore document type declaration.
                    if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                        continue;
                    if (sibling.nodeName == element.nodeName)
                        ++index;
                }

                for (var sibling = element.nextSibling; sibling && !hasFollowingSiblings;
                    sibling = sibling.nextSibling)
                {
                    if (sibling.nodeName == element.nodeName)
                        hasFollowingSiblings = true;
                }
                var tagName = (element.prefix ? element.prefix + ":" : "") + element.localName;
                var pathIndex = (index || hasFollowingSiblings ? "[" + (index + 1) + "]" : "");
                paths.splice(0, 0, tagName + pathIndex);
            }
            return paths.length ? "/" + paths.join("/") : null;
        };
    });

    this.echo(this.evaluate(function(){
        return getElementXPath(document.querySelector("a"));
    }));
});

casper.run();

输出:

/html/body/div/p[2]/a

诀窍是让 getElementXPath 在页面上下文的全局范围内可用。这可以通过在 window.getElementXPath.

上设置变量轻松实现