Skip to content

Commit

Permalink
refactor(interfaces): move more server rlogic into domain-routers
Browse files Browse the repository at this point in the history
  • Loading branch information
rascala committed Jul 3, 2020
1 parent 1508c20 commit 3230283
Show file tree
Hide file tree
Showing 11 changed files with 634 additions and 239 deletions.
241 changes: 210 additions & 31 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"dependencies": {
"@types/express": "^4.17.6",
"express": "^4.17.1",
"puppeteer": "^4.0.1",
"puppeteer-extra": "^3.1.9"
},
"devDependencies": {
Expand Down
157 changes: 157 additions & 0 deletions scripts/script.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* eslint-disable max-params */
// import { Authless } from '../src/index'
// import { BotRouter } from '../src/bots/botrouter'
import { IResponse as IAuthlessResponse, IBotRouter, IBot } from '../src/types'
// import { AuthlessServer } from '../src/server/server'
import { Bot } from '../src'
import { BotRouter } from '../src/bots/botrouter'
import { Browser } from 'puppeteer'
import { DomainPath } from '../src/domainPaths/domainPath'
import { DomainPathRouter } from '../src/domainPaths/domainPathRouter'
// import { Bot } from '../src/bots/bot'

// use this service and pass it to authless server
class SampleDomainPath extends DomainPath {

// domain = 'https://www.crunchbase.com'
domain = 'google.com'
// urls = ['http://www.crunchbase.com']
// urls = ['google.com']
// botRouter: IBotRouter

constructor (domain: string) {
super(domain)
this.domain = domain
}

// eslint-disable-next-line class-methods-use-this
async isAuthenticated (page: any): Promise<boolean> {
return true
}

// eslint-disable-next-line class-methods-use-this
async authenticate (page: any): Promise<any> {
// do authentication here
return true
}

// eslint-disable-next-line class-methods-use-this
async pageHandler (browser: Browser, selectedDomainPath, selectedBot?: IBot, config?: any): Promise<IAuthlessResponse | null> {

const { puppeteerParams, urlParams } = config

// const browser = await this.launchBrowser(selectedDomainPath, selectedBot, {puppeteerParams, puppeteerPlugins})
const page = await browser.newPage()
await this.setupPage(page, puppeteerParams)

const url = 'google.com'
console.log(`going to url ${url as string}`)
const response = await page.goto(
`https://www.${url}`,
{referer: 'google.com'}
)

const pageUrl = await page.url()
console.log(`-- pageUrl: ${pageUrl}`)

// const isAuthenticated = await selectedDomainPath.isAuthenticated(page)
// if(isAuthenticated === false) {
// await selectedDomainPath.authenticate(page, selectedBot)
// }
// do scraping here
console.log('hello from pageHandler')
return await this.getJsonResponse(page)
// return null
}
}

const botRouter = new BotRouter({
'google': [new Bot('usernmae', 'password')],
'crunchbase': [new Bot('usernmae', 'password')],
'crunchbase-free': [new Bot('usernmae', 'password')],
})

// const cbBotRouter = new BotRouter([new Bot('cbusername', 'cbpassword')])
// const cbDomainPath = new SampleDomainPath('crunchbase.com', cbBotRouter, ['crunchbase.com'])
const domainPathRouter = new DomainPathRouter({
'google.com': new SampleDomainPath('google-home'),
'crunchbase.com': new SampleDomainPath('crunchbase-home'),
'crunchbase.com/person/..': new SampleDomainPath('crunchbase-person'),
'linkedin.com': new SampleDomainPath('linkedin-home'),
})

console.log(domainPathRouter)

const puppeteerParams = {
executablePath: '/Applications/Chromium.app/Contents/MacOS/Chromium',
headless: false,
}
const puppeteerPlugins = []
// const server = new AuthlessServer(
// domainPathRouter,
// {
// executablePath: '/Applications/Chromium.app/Contents/MacOS/Chromium',
// headless: false,
// },
// // eslint-disable-next-line array-bracket-newline
// [
// // stealthPlugin
// // adBlockPlugin
// // eslint-disable-next-line array-bracket-newline
// ]
// )

// console.log('hello')
// server.run()
// console.log('bye')

const url = 'google.com'
const domainPath = domainPathRouter.getDomainPathForUrl(url)
const bot = botRouter.getBotForUrl(url)
if(typeof domainPath !== 'undefined') {
domainPathRouter.launchBrowser(domainPath, bot, {puppeteerParams, puppeteerPlugins})
.then(browser => {
domainPath.pageHandler(browser, bot, {urlParams: {}})
.then(res => console.log(res))
.catch(err => console.log(err))
})
.catch(err => {
console.log('failed to launch browser')
console.log(err)
})
}
// eslint-disable-next-line multiline-comment-style
/*
// level 1 - domainPath level
const bot = new CrunchbaseFreeBot('username', 'password')
const cbCompanyProfileDomainPath = new CruchBaseProfileDomainPath()
const response = await cbCompanyProfileDomainPath.pageHandler(page, bot)
const resource = response.toResources(response)
// level 2 - domain level
import { domainPathRouter } from '...'
const response = await domainPathRouter.pageHandler(browser, 'https://crunchbase.com/profiles/1234', bot)
const response = await domainPathRouter.apiHandler(api, 'https://api.crunchbase.com/profiles/1234', bot)
const domainPathRouter = new DomainPathRouter([domainPath, cbDomainPath, liDomainPath])
const domainsHash = {
'crunchbase.com': cbDomainPathRouter,
'linkedin.com': liDomainPathRouter,
'google.com': ggDomainPathRouter,
}
const botsHash = {
'crunchbase.com': cbBotRouter,
'linkedin.com': liBotRouter,
'google.com': ggBotRouter,
}
// level 3 - multi-domainPath level
const {url, referer} = expressRequest.query
const domainRouter = domainsHash[url.toDomain()]
const bot = botsHash[url.toDomain()].getBot()
domainRouter.pageHandler(url, bot, {puppeteerParams, puppeteerPlugins})
domainRouter.pageHandler(url, botRouter.getBot(url), {puppeteerParams, puppeteerPlugins})
*/
124 changes: 0 additions & 124 deletions src/authless.ts

This file was deleted.

2 changes: 1 addition & 1 deletion src/bots/bot.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* eslint-disable no-warning-comments */
import { IBot } from '../types2'
import { IBot } from '../types'

// 1 minutes = 60_000 milliseconds
const ONE_MINUTE = 60_000
Expand Down
54 changes: 38 additions & 16 deletions src/bots/botrouter.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,50 @@
import { IBot, IBotRouter } from '../types2'
import { IBot, IBotRouter } from '../types'

export class BotRouter implements IBotRouter {
bots: IBot[]
botIndex = 0
botMap: {[url: string]: IBot[]}
botIndices: {[url: string]: number}

constructor (bots: IBot[]) {
this.bots = bots
constructor (botMap: {[url: string]: IBot[]}) {
this.botMap = botMap
this.botIndices = Object.keys(botMap).reduce((map, url) => {
map[url] = 0
return map
}, {})
}

private incrementBotIndex (): number {
const botIndex = this.botIndex
this.botIndex = (this.botIndex + 1) % this.bots.length
return botIndex
}
// private incrementBotIndex (): number {
// const botIndex = this.botIndex
// this.botIndex = (this.botIndex + 1) % this.bots.length
// return botIndex
// }

getBotForUrl (url: string): IBot | undefined {
console.log(`url = ${url}`)
const matchedUrlKeys = Object.keys(this.botMap)
.sort((a, b) => a.length - b.length)
.filter(domainUrl => url.includes(domainUrl))

getBot (): IBot | undefined {
if(this.botIndex < this.bots.length) {
return this.bots[this.incrementBotIndex()]
console.log(`matchedUrlKeys = ${JSON.stringify(matchedUrlKeys)}`)
if(matchedUrlKeys.length > 0) {
const matchedUrl = matchedUrlKeys[0]
const matchedBots = this.botMap[matchedUrlKeys[0]]
const botIndex = this.botIndices[matchedUrl]
if(botIndex < matchedBots.length) {
this.botIndices[matchedUrl] = (this.botIndices[matchedUrl] + 1) % matchedBots.length
return matchedBots[botIndex]
}
}
}

// getBot (): IBot | undefined {
// if(this.botIndex < this.bots.length) {
// return this.bots[this.incrementBotIndex()]
// }
// }

// eslint-disable-next-line no-warning-comments
// TODO - get only if bot.isBelowRateLimit() is true
getBotByUsername (name: string): IBot | undefined {
return this.bots.find(bot => bot.username === name)
}
// getBotByUsername (name: string): IBot | undefined {
// return this.bots.find(bot => bot.username === name)
// }
}
Loading

0 comments on commit 3230283

Please sign in to comment.