diff --git a/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts b/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts index cfb8bddb3804..a33d5260c4ea 100644 --- a/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts +++ b/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts @@ -1,44 +1,27 @@ +import { findStyleTagIndexes } from 'roosterjs-content-model-dom'; + const HtmlCommentStart = '\x3C!--'; const HtmlCommentStart2 = ''; -const styleTag = ' -1) { html = removeCommentsFromHtml(html, HtmlCommentStart, styleEndIndex, styleIndex); html = removeCommentsFromHtml(html, HtmlCommentStart2, styleEndIndex, styleIndex); html = removeCommentsFromHtml(html, HtmlCommentEnd, styleEndIndex, styleIndex); - ({ styleIndex, styleEndIndex } = extractHtmlIndexes(html, styleEndIndex + 1)); + ({ styleIndex, styleEndIndex } = findStyleTagIndexes(html, styleEndIndex + 1)); } return html; } -function extractHtmlIndexes(html: string, startIndex: number = 0) { - const htmlLowercase = html.toLowerCase(); - let styleIndex = htmlLowercase.indexOf(styleTag, startIndex); - let currentIndex = styleIndex + styleTag.length; - let nextChar = html.substring(currentIndex, currentIndex + 1); - - while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) { - styleIndex = htmlLowercase.indexOf(styleTag, styleIndex + 1); - currentIndex = styleIndex + styleTag.length; - nextChar = html.substring(currentIndex, currentIndex + 1); - } - - const styleEndIndex = htmlLowercase.indexOf(styleClosingTag, startIndex); - return { styleIndex, styleEndIndex }; -} - function removeCommentsFromHtml( html: string, marker: string, diff --git a/packages/roosterjs-content-model-dom/lib/domUtils/findStyleTagIndexes.ts b/packages/roosterjs-content-model-dom/lib/domUtils/findStyleTagIndexes.ts new file mode 100644 index 000000000000..1755a2b707ab --- /dev/null +++ b/packages/roosterjs-content-model-dom/lib/domUtils/findStyleTagIndexes.ts @@ -0,0 +1,31 @@ +const StyleTag = '...` block in an HTML string. + * The opening match is rejected if the character after `` would otherwise falsely match). + * @param html The HTML string to scan + * @param startIndex Index to start searching from (default 0) + * @returns Object with `styleIndex` (start of ``). + * Either may be `-1` if not found. + */ +export function findStyleTagIndexes( + html: string, + startIndex: number = 0 +): { styleIndex: number; styleEndIndex: number } { + const htmlLowercase = html.toLowerCase(); + let styleIndex = htmlLowercase.indexOf(StyleTag, startIndex); + let currentIndex = styleIndex + StyleTag.length; + let nextChar = html.substring(currentIndex, currentIndex + 1); + + while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) { + styleIndex = htmlLowercase.indexOf(StyleTag, styleIndex + 1); + currentIndex = styleIndex + StyleTag.length; + nextChar = html.substring(currentIndex, currentIndex + 1); + } + + const styleEndIndex = htmlLowercase.indexOf(StyleClosingTag, startIndex); + return { styleIndex, styleEndIndex }; +} diff --git a/packages/roosterjs-content-model-dom/lib/index.ts b/packages/roosterjs-content-model-dom/lib/index.ts index df09746614b0..619498208d7e 100644 --- a/packages/roosterjs-content-model-dom/lib/index.ts +++ b/packages/roosterjs-content-model-dom/lib/index.ts @@ -40,6 +40,7 @@ export { reuseCachedElement } from './domUtils/reuseCachedElement'; export { isWhiteSpacePreserved } from './domUtils/isWhiteSpacePreserved'; export { normalizeRect } from './domUtils/normalizeRect'; export { scrollRectIntoView } from './domUtils/scrollRectIntoView'; +export { findStyleTagIndexes } from './domUtils/findStyleTagIndexes'; export { setLinkUndeletable, isLinkUndeletable } from './domUtils/hiddenProperties/undeletableLink'; diff --git a/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts b/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts index 0bed31e4cc41..033bec769b8a 100644 --- a/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts +++ b/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts @@ -1,41 +1,23 @@ -import { getObjectKeys } from 'roosterjs-content-model-dom'; +import { findStyleTagIndexes, getObjectKeys } from 'roosterjs-content-model-dom'; import type { WordMetadata } from './WordMetadata'; const FORMATING_REGEX = /[\n\t'{}"]+/g; const STYLE_TAG = '= 0 && styleEndIndex >= 0) { const styleContent = htmlContent .substring(styleIndex + STYLE_TAG.length, styleEndIndex) .trim(); styles.push(styleContent); - ({ styleIndex, styleEndIndex } = extractHtmlIndexes(htmlContent, styleEndIndex + 1)); + ({ styleIndex, styleEndIndex } = findStyleTagIndexes(htmlContent, styleEndIndex + 1)); } return styles; } -function extractHtmlIndexes(html: string, startIndex: number = 0) { - const htmlLowercase = html.toLowerCase(); - let styleIndex = htmlLowercase.indexOf(STYLE_TAG, startIndex); - let currentIndex = styleIndex + STYLE_TAG.length; - let nextChar = html.substring(currentIndex, currentIndex + 1); - - while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) { - styleIndex = htmlLowercase.indexOf(STYLE_TAG, styleIndex + 1); - currentIndex = styleIndex + STYLE_TAG.length; - nextChar = html.substring(currentIndex, currentIndex + 1); - } - - const styleEndIndex = htmlLowercase.indexOf(STYLE_TAG_END, startIndex); - return { styleIndex, styleEndIndex }; -} - /** * @internal * Word Desktop content has a style tag that contains data for the lists.