Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chrome headless #1

Open
tiancheng91 opened this issue May 27, 2018 · 7 comments
Open

chrome headless #1

tiancheng91 opened this issue May 27, 2018 · 7 comments

Comments

@tiancheng91
Copy link
Owner

tiancheng91 commented May 27, 2018

安装

troubleshooting

[推荐]snap安装

snap install chromium && apt install fonts-wqy-microhei

官方deb包安装

apt-get install -y libappindicator1 fonts-liberation fonts-wqy-microhei
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
dpkg -i google-chrome*.deb
apt-get -f install

ln -s /opt/google/chrome/google-chrome /usr/bin/chrome

# 跳过node下载自带chrome
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true

docker启动

docker container run -d -p 9222:9222 tiancheng91/chrome-headless --remote-debugging-address=0.0.0.0 --remote-debugging-port=9222

puppeteer

apt-get install gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
npm i npm@4 -g
npm -g install puppeteer

使用

####puppent启动

launch({
        executablePath: '',
        args: ['--no-sandbox'],
})

headless中使用的默认参数

chromium --headless \
  --remote-debugging-port=9222 --no-sandbox \
  --disable-background-networking \
  --disable-background-timer-throttling \
  --disable-client-side-phishing-detection \
  --disable-default-apps \
  --disable-dev-shm-usage \
  --disable-extensions \
  --disable-hang-monitor \
  --disable-popup-blocking \
  --disable-prompt-on-repost \
  --disable-sync \
  --disable-translate \
  --metrics-recording-only \
  --no-first-run \
  --safebrowsing-disable-auto-update
@tiancheng91
Copy link
Owner Author

tiancheng91 commented May 27, 2018

npm EACCES 错误

https://docs.npmjs.com/getting-started/fixing-npm-permissions

// 不知道什么鬼,换老版本npm没问题
npm i npm@4 -g

@tiancheng91
Copy link
Owner Author

tiancheng91 commented May 30, 2018

链式调用

// pup-chain
const ObjProperties = require("simple-property-retriever");
const pWaterfall = require('p-waterfall');

class Chain {
  constructor(initiator) {
    this.initiated = true; // idk, I just put it here to track things. Dead code for now.
    this.chain = []; // the container for all chains
    let addMethods = (ref) => {
      this.ref = ref;
      const properties = ObjProperties.getOwnNonenumerables(ref.__proto__);
      for (const property of properties) {
        // if (property === "constructor")
        //   continue;

        this[property] = function(...args) {
          this.chain.push(() => this.ref[property](...args))
          return this;
        };
      };
      return this.ref;
    }
    addMethods(initiator)
  }

  run() {
    let chain = [...this.chain];
    this.chain = [];
    return pWaterfall(chain)
  }
};

module.exports = Chain;
// app.js
const puppeteer = require('puppeteer');
const Chain = require("pup-chain");

(async() => {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  let urls = ["https://example.com", "https://example.org"];

  for (url of urls) {
    let mainChain = await new Chain(page);

    newurl = await mainChain
      .goto(url)
      .url()
      .run();

    title = await mainChain
      .title()
      .run();

    console.log(`${url} > URL: "${newurl}", Title: "${title}"`);
  }

  browser.close();
})();

@tiancheng91 tiancheng91 changed the title chromeless chromeless 安装及使用 May 30, 2018
@tiancheng91

This comment has been minimized.

@tiancheng91
Copy link
Owner Author

tiancheng91 commented May 30, 2018

@tiancheng91
Copy link
Owner Author

需求: DevTools proxy

  • 不同path连接不同chrome实例, 使用不同user-data,便于持久化网站会话信息.

@tiancheng91
Copy link
Owner Author

puppeteer 过滤脚本

page.on('request', async request => {
  if (request.url().indexOf('googleadservices.com') > 0) {
    request.abort();
  }
  request.continue();
});

@tiancheng91
Copy link
Owner Author

多会话处理:
puppeteer/puppeteer#85
puppeteer/puppeteer#645

const puppeteer = require('puppeteer');
const Page = require('puppeteer/lib/Page');

async function newPageWithNewContext(browser) {
  const {browserContextId} = await browser._connection.send('Target.createBrowserContext');
  const {targetId} = await browser._connection.send('Target.createTarget', {url: 'about:blank', browserContextId});
  const client = await browser._connection.createSession(targetId);
  const page = await Page.create(client, browser._ignoreHTTPSErrors, browser._screenshotTaskQueue);
  page.browserContextId = browserContextId;
  return page;
}

async function closePage(browser, page) {
  if (page.browserContextId != undefined) {
    await browser._connection.send('Target.disposeBrowserContext', {browserContextId: page.browserContextId});
  }
  await page.close();
}

(async () => {
  const browser = await puppeteer.launch();
  const page = await newPageWithNewContext(browser);
  await page.goto('https://example.com');
  console.log(await page.cookies());

  await closePage(browser, page);
  await browser.close();
})();

@tiancheng91 tiancheng91 changed the title chromeless 安装及使用 chrome headless 安装及使用 Jun 30, 2018
@tiancheng91 tiancheng91 changed the title chrome headless 安装及使用 chrome headless Jun 30, 2018
@tiancheng91 tiancheng91 mentioned this issue Oct 8, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant