PhantomJS Node-page.open-无法跟踪多个页面[英] PhantomJS Node - page.open - cannot keep track of multiple pages

问题描述

我正在使用 Phantom Node 来连接节点与 PhantomJS.我正在尝试并行打开页面,但问题是 page.open 回调函数不会传回对页面的引用,因此我无法知道哪个页面已完成.

相关代码

self.queue[j].page.open.call( self.queue[j].page, rows[i].url, function( status )
{
   console.log( this ) // <-- returns undefined
   // So how do I keep track of which pages have finished loading?
   // The only variable I have available here is `status`
});

完整功能代码:

SnapEngine.prototype.processSnaps = function( rows, type )
{
var self = this;

if ( ! rows || rows.length === 0 ) return true;

for( var i = 0; i < rows.length; i++ )
{
    // If queue is full, stop processing and wait for next snap engine iteration
    if ( self.getAvailableSizeInQueue() <= 0 )
    {
        self.logger.info( 'Queue is full for signature snap processing' );
        return true;
    }

    // Snapshots are processed by url, if multiple duplication urls are requested, all are updated after one of them is complete
    // So if a url is already being processed, don't reprocess it
    if ( self.findUrlInQueue( rows[i].url ) !== false )
    {
        self.logger.info( 'URL already being processed', url );
        continue;
    }

    for( j = 0; j < self.queue.length; j++ )
    {
        // Find an unused page object
        if ( self.queue[j] && self.queue[j].hasOwnProperty( 'page' ) && ( ! self.queue[j].page.url || self.queue[j].page.url == '' ))
        {
            self.logger.info( 'Opening URL in browser', rows[i].url );

            // Start loading page
            self.queue[j].page.open.call( self.queue[j].page, rows[i].url, function( status )
            {
                // ===== ISSUE HERE =====
                var url = this.url; // <-- this is undefined
                // ======================

                self.resetPage( self.queue[ index ]);

                if ( status === 'success' )
                {
                    self.updateStatus( url, 'ready' );
                }
                else
                {
                    self.updateStatus( url, 'failed' );
                }

                self.removeUrlFromQueue( url )
            });

            self.updateStatus( rows[i].url, 'processing' );
            break;
        }
    }
}
}

推荐答案

试试这样:

我添加了一个直接在打开页面的部分周围执行的函数,从而引入了一个新的范围.因此 url 不会被破坏(你不能使用 rows[i].url 因为我会在你的回调被调用之前改变)并且可以在你的回调中使用.

for( j = 0; j < self.queue.length; j++ )
{
    // Find an unused page object
    if ( self.queue[j] && self.queue[j].hasOwnProperty( 'page' ) && ( ! self.queue[j].page.url || self.queue[j].page.url == '' ))
    {
        self.logger.info( 'Opening URL in browser', rows[i].url );
        (function() {
            var url = rows[i].url;
            // Start loading page
            self.queue[j].page.open.call( self.queue[j].page, url, function( status )
            {                   
                self.resetPage( self.queue[ index ]);

                if ( status === 'success' )
                {
                    self.updateStatus( url, 'ready' );
                }
                else
                {
                    self.updateStatus( url, 'failed' );
                }

                self.removeUrlFromQueue( url )
            });
        })();

        self.updateStatus( rows[i].url, 'processing' );
        break;
    }
}

本文地址:https://www.itbaoku.cn/post/1739914.html