Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
}
const title = DomUtils.findOne(elem => elem.name === "h1",
contentDiv.children, true);
const titleText = title && DomUtils.getText(title);
article.title = titleText || `Article No. ${index + 1}`;
let imageIndex = 0;
const imageUrls: Dictionary = {};
DomUtils.findOne(elem => {
// download images
if (elem.name === "img" && elem.attribs && elem.attribs.src) {
const src = elem.attribs.src;
const imageUrl = src.startsWith("http") ? src : this.parameters.drupalBaseUrl + src;
const imageName = filenamify(decodeURIComponent(src.split("/").pop()));
const imageFileName = `article-${articleIndex}-${imageName}`;
const imageAssetPath = `${this.articlesDir}/${imageFileName}`;
export async function fetchArticle(context: IPlayContext, article: IArticle, url: string, index: number): Promise
{
const articleIndex = index.toString().padStart(2, "0");
console.log(`fetchArticle - fetching HTML from ${url}`);
const pageHtml = await fetch.text(url, "GET");
// parse the article's HTML content
const handler = new DomHandler();
const parser = new Parser(handler);
parser.write(pageHtml);
parser.done();
const dom = handler.dom;
// find parent of article content
const contentDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("region-content") >=0,
dom, true);
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
async fetchArticle(article: IArticle, url: string, index: number): Promise
{
const articleIndex = index.toString().padStart(2, "0");
console.log(`fetchArticle - fetching HTML from ${url}`);
const pageHtml = await fetch.text(url, "GET");
// parse the article's HTML content
const handler = new DomHandler();
const parser = new Parser(handler);
parser.write(pageHtml);
parser.done();
const dom = handler.dom;
// find parent of article content
const contentDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("region-content") >=0,
dom, true);
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
}
const title = DomUtils.findOne(elem => elem.name === "h1",
contentDiv.children, true);
const titleText = title && DomUtils.getText(title);
article.title = titleText || `Article No. ${index + 1}`;
let imageIndex = 0;
const imageUrls: Dictionary = {};
DomUtils.findOne(elem => {
// download images
if (elem.name === "img" && elem.attribs && elem.attribs.src) {
const src = elem.attribs.src;
const imageUrl = src.startsWith("http") ? src : context.drupalBaseUrl + src;
const imageName = filenamify(decodeURIComponent(src.split("/").pop()));
const imageFileName = `article-${articleIndex}-${imageName}`;
const imageAssetPath = `${context.articleDir}/${imageFileName}`;
context.files[imageAssetPath] = imageAssetPath;
elem.attribs.src = imageFileName; // relative to location of html file
imageUrls[imageUrl] = imageAssetPath;
imageIndex++;
}
// remove additional classes from all nodes
if (elem.attribs && elem.attribs.class) {
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
}
const title = DomUtils.findOne(elem => elem.name === "h1",
contentDiv.children, true);
const titleText = title && DomUtils.getText(title);
article.title = titleText || `Article No. ${index + 1}`;
let imageIndex = 0;
const imageUrls: Dictionary = {};
DomUtils.findOne(elem => {
// download images
if (elem.name === "img" && elem.attribs && elem.attribs.src) {
const src = elem.attribs.src;
const imageUrl = src.startsWith("http") ? src : context.drupalBaseUrl + src;
const imageName = filenamify(decodeURIComponent(src.split("/").pop()));
const imageFileName = `article-${articleIndex}-${imageName}`;
const imageAssetPath = `${context.articleDir}/${imageFileName}`;
const parser = new Parser(handler);
parser.write(pageHtml);
parser.done();
const dom = handler.dom;
// find parent of article content
const contentDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("region-content") >=0,
dom, true);
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
}
const title = DomUtils.findOne(elem => elem.name === "h1",
contentDiv.children, true);
const titleText = title && DomUtils.getText(title);
article.title = titleText || `Article No. ${index + 1}`;
let imageIndex = 0;
const parser = new Parser(handler);
parser.write(pageHtml);
parser.done();
const dom = handler.dom;
// find parent of article content
const contentDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("region-content") >=0,
dom, true);
if (!contentDiv) {
throw new Error("Article content not found (no 'region-content' class)");
}
// remove article body-enclosing div (class "threed-sidebar-article-body"), then re-parent children
const bodyDiv = DomUtils.findOne(elem =>
elem.attribs && elem.attribs.class && elem.attribs.class.indexOf("threed-sidebar-article-body") >= 0,
contentDiv.children, true);
if (bodyDiv) {
const parent: any = bodyDiv.parent;
bodyDiv.children.forEach(child => DomUtils.appendChild(parent, child));
DomUtils.removeElement(bodyDiv);
}
const title = DomUtils.findOne(elem => elem.name === "h1",
contentDiv.children, true);
const titleText = title && DomUtils.getText(title);
article.title = titleText || `Article No. ${index + 1}`;
let imageIndex = 0;