Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// no background images, even ones that have reasonable aspect ratios
// TODO: If necessary, also look at parents. I've seen them say
// "background" in their IDs as well.
rule(type('image'), score(this.hasBackgroundInID.bind(this)), {name: 'hasBackgroundInID'}),
// return image element(s) with max score
rule(type('image').max(), out('image')),
/**
* Title rules
*/
// consider all eligible h1 elements
rule(dom('h1').when(this.isEligibleTitle.bind(this)), type('title')),
// better score based on y-axis proximity to max scoring image element
rule(type('title'), score(this.isNearImageTopOrBottom.bind(this)), {name: 'isNearImageTopOrBottom'}),
// return title element(s) with max score
rule(type('title').max(), out('title')),
/**
* Price rules
*/
// 72% by itself, at [4, 4, 4, 4...]!:
// consider all eligible span and h2 elements
rule(dom('span, h2').when(this.isEligiblePrice.bind(this)), type('price')),
// check if the element has a '$' in its innerText
rule(type('price'), score(this.hasDollarSign.bind(this)), {name: 'hasDollarSign'}),
// better score the closer the element is to the top of the page
rule(type('price'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldPrice'}),
// check if the id has "price" in it
rule(type('price'), score(this.hasPriceInID.bind(this)), {name: 'hasPriceInID'}),
rule(type('price'), score(this.hasPriceInParentID.bind(this)), {name: 'hasPriceInParentID'}),
// check if any class names have "price" in them
rule(type('price'), score(this.hasPriceInClassName.bind(this)), {name: 'hasPriceInClassName'}),
// TODO: Consider a bonus for <img> tags.
rule(dom('div').when(fnode => this.isVisible(fnode) && this.hasBackgroundImage(fnode)), type('image')),
// better score the closer the element is to the top of the page
rule(type('image'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldImage'}),
// better score for larger images
rule(type('image'), score(this.isBig.bind(this)), {name: 'isBig'}),
// bonus for non-extreme aspect ratios, to filter out banners or nav elements
// TODO: Meant to make this a penalty, but it turns out to work as is.
// Try as a penalty.
rule(type('image'), score(this.hasSquareAspectRatio.bind(this)), {name: 'hasSquareAspectRatio'}),
// no background images, even ones that have reasonable aspect ratios
// TODO: If necessary, also look at parents. I've seen them say
// "background" in their IDs as well.
rule(type('image'), score(this.hasBackgroundInID.bind(this)), {name: 'hasBackgroundInID'}),
// return image element(s) with max score
rule(type('image').max(), out('image')),
/**
* Title rules
*/
// consider all eligible h1 elements
rule(dom('h1').when(this.isEligibleTitle.bind(this)), type('title')),
// better score based on y-axis proximity to max scoring image element
rule(type('title'), score(this.isNearImageTopOrBottom.bind(this)), {name: 'isNearImageTopOrBottom'}),
// return title element(s) with max score
rule(type('title').max(), out('title')),
/**
* Price rules
*/
// 72% by itself, at [4, 4, 4, 4...]!:
// consider all eligible span and h2 elements
const lightness = (cMax + cMin) / 2;
const denom = (1 - (Math.abs(2 * lightness - 1)));
// Return 0 if it's black (R, G, and B all 0).
return (denom === 0) ? 0 : delta / denom;
}
/* The actual ruleset */
const rules = ruleset([
rule(dom('div'), type('overlay')),
rule(type('overlay'), score(big), {name: 'big'}),
rule(type('overlay'), score(nearlyOpaque), {name: 'nearlyOpaque'}),
rule(type('overlay'), score(monochrome), {name: 'monochrome'}),
rule(type('overlay'), score(suspiciousClassOrId), {name: 'classOrId'}),
rule(type('overlay'), score(visible), {name: 'visible'}),
rule(type('overlay').max(), out('overlay'))
]);
return rules;
}
}
// better score the closer the element is to the top of the page
rule(type('price'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldPrice'}),
// check if the id has "price" in it
rule(type('price'), score(this.hasPriceInID.bind(this)), {name: 'hasPriceInID'}),
rule(type('price'), score(this.hasPriceInParentID.bind(this)), {name: 'hasPriceInParentID'}),
// check if any class names have "price" in them
rule(type('price'), score(this.hasPriceInClassName.bind(this)), {name: 'hasPriceInClassName'}),
rule(type('price'), score(this.hasPriceInParentClassName.bind(this)), {name: 'hasPriceInParentClassName'}),
// better score for larger font size
rule(type('price'), score(this.fontIsBig.bind(this)), {name: 'fontIsBig'}),
// better score based on x-axis proximity to max scoring image element
rule(type('price'), score(this.isNearImage.bind(this)), {name: 'isNearImage'}),
// check if innerText has a price pattern
rule(type('price'), score(this.hasPriceishPattern.bind(this)), {name: 'hasPriceishPattern'}),
// return price element(s) with max score
rule(type('price').max(), out('price')),
],
coeffs,
biases);
}
}
const paragraphishNote = fnode.noteFor('paragraphish')
return paragraphishNote
? (1 - linkDensity(fnode, paragraphishNote.inlineLength)) * 1.5
: (1 - linkDensity(fnode)) * 1.5
}),
),
rule(dom('p'), score(4.5).type('paragraphish')),
rule(
type('paragraphish').bestCluster({
splittingDistance: 3,
differentDepthCost: 6.5,
differentTagCost: 2,
sameTagCost: 0.5,
strideCost: 0,
}),
out('content').allThrough(domSort),
),
)
export default rules