Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,44 +1,27 @@
import { findStyleTagIndexes } from 'roosterjs-content-model-dom';

const HtmlCommentStart = '\x3C!--';
const HtmlCommentStart2 = '<!--';
const HtmlCommentEnd = '-->';
const styleTag = '<style';
const styleClosingTag = '</style>';
const nonWordCharacterRegex = /\W/;

/**
* @internal
* Exported only for unit test
*/
export function cleanHtmlComments(html: string) {
let { styleIndex, styleEndIndex } = extractHtmlIndexes(html);
let { styleIndex, styleEndIndex } = findStyleTagIndexes(html);

while (styleIndex > -1) {
html = removeCommentsFromHtml(html, HtmlCommentStart, styleEndIndex, styleIndex);
html = removeCommentsFromHtml(html, HtmlCommentStart2, styleEndIndex, styleIndex);
html = removeCommentsFromHtml(html, HtmlCommentEnd, styleEndIndex, styleIndex);

({ styleIndex, styleEndIndex } = extractHtmlIndexes(html, styleEndIndex + 1));
({ styleIndex, styleEndIndex } = findStyleTagIndexes(html, styleEndIndex + 1));
}

return html;
}

function extractHtmlIndexes(html: string, startIndex: number = 0) {
const htmlLowercase = html.toLowerCase();
let styleIndex = htmlLowercase.indexOf(styleTag, startIndex);
let currentIndex = styleIndex + styleTag.length;
let nextChar = html.substring(currentIndex, currentIndex + 1);

while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
styleIndex = htmlLowercase.indexOf(styleTag, styleIndex + 1);
currentIndex = styleIndex + styleTag.length;
nextChar = html.substring(currentIndex, currentIndex + 1);
}

const styleEndIndex = htmlLowercase.indexOf(styleClosingTag, startIndex);
return { styleIndex, styleEndIndex };
}

function removeCommentsFromHtml(
html: string,
marker: string,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const StyleTag = '<style';
const StyleClosingTag = '</style>';
const nonWordCharacterRegex = /\W/;

/**
* Find the indexes of the next `<style>...</style>` block in an HTML string.
* The opening match is rejected if the character after `<style` is a word
* character (e.g. `<styles>` would otherwise falsely match).
* @param html The HTML string to scan
* @param startIndex Index to start searching from (default 0)
* @returns Object with `styleIndex` (start of `<style`) and `styleEndIndex` (start of `</style>`).
* Either may be `-1` if not found.
*/
export function findStyleTagIndexes(
html: string,
startIndex: number = 0
): { styleIndex: number; styleEndIndex: number } {
const htmlLowercase = html.toLowerCase();
let styleIndex = htmlLowercase.indexOf(StyleTag, startIndex);
let currentIndex = styleIndex + StyleTag.length;
let nextChar = html.substring(currentIndex, currentIndex + 1);

while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
styleIndex = htmlLowercase.indexOf(StyleTag, styleIndex + 1);
currentIndex = styleIndex + StyleTag.length;
nextChar = html.substring(currentIndex, currentIndex + 1);
}

const styleEndIndex = htmlLowercase.indexOf(StyleClosingTag, startIndex);
return { styleIndex, styleEndIndex };
}
1 change: 1 addition & 0 deletions packages/roosterjs-content-model-dom/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export { reuseCachedElement } from './domUtils/reuseCachedElement';
export { isWhiteSpacePreserved } from './domUtils/isWhiteSpacePreserved';
export { normalizeRect } from './domUtils/normalizeRect';
export { scrollRectIntoView } from './domUtils/scrollRectIntoView';
export { findStyleTagIndexes } from './domUtils/findStyleTagIndexes';

export { setLinkUndeletable, isLinkUndeletable } from './domUtils/hiddenProperties/undeletableLink';

Expand Down
Original file line number Diff line number Diff line change
@@ -1,41 +1,23 @@
import { getObjectKeys } from 'roosterjs-content-model-dom';
import { findStyleTagIndexes, getObjectKeys } from 'roosterjs-content-model-dom';
import type { WordMetadata } from './WordMetadata';

const FORMATING_REGEX = /[\n\t'{}"]+/g;
const STYLE_TAG = '<style';
const STYLE_TAG_END = '</style>';
const nonWordCharacterRegex = /\W/;

function extractStyleTagsFromHtml(htmlContent: string): string[] {
const styles: string[] = [];

let { styleIndex, styleEndIndex } = extractHtmlIndexes(htmlContent);
let { styleIndex, styleEndIndex } = findStyleTagIndexes(htmlContent);
while (styleIndex >= 0 && styleEndIndex >= 0) {
const styleContent = htmlContent
.substring(styleIndex + STYLE_TAG.length, styleEndIndex)
.trim();
styles.push(styleContent);
({ styleIndex, styleEndIndex } = extractHtmlIndexes(htmlContent, styleEndIndex + 1));
({ styleIndex, styleEndIndex } = findStyleTagIndexes(htmlContent, styleEndIndex + 1));
}
return styles;
}

function extractHtmlIndexes(html: string, startIndex: number = 0) {
const htmlLowercase = html.toLowerCase();
let styleIndex = htmlLowercase.indexOf(STYLE_TAG, startIndex);
let currentIndex = styleIndex + STYLE_TAG.length;
let nextChar = html.substring(currentIndex, currentIndex + 1);

while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
styleIndex = htmlLowercase.indexOf(STYLE_TAG, styleIndex + 1);
currentIndex = styleIndex + STYLE_TAG.length;
nextChar = html.substring(currentIndex, currentIndex + 1);
}

const styleEndIndex = htmlLowercase.indexOf(STYLE_TAG_END, startIndex);
return { styleIndex, styleEndIndex };
}

/**
* @internal
* Word Desktop content has a style tag that contains data for the lists.
Expand Down
Loading