microsoft · JiuqingSong · May 21, 2026
diff --git a/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts b/packages/roosterjs-content-model-core/lib/command/paste/cleanHtmlComments.ts
@@ -1,44 +1,27 @@
+import { findStyleTagIndexes } from 'roosterjs-content-model-dom';
+
 const HtmlCommentStart = '\x3C!--';
 const HtmlCommentStart2 = '<!--';
 const HtmlCommentEnd = '-->';
-const styleTag = '<style';
-const styleClosingTag = '</style>';
-const nonWordCharacterRegex = /\W/;
 
 /**
  * @internal
  * Exported only for unit test
  */
 export function cleanHtmlComments(html: string) {
-    let { styleIndex, styleEndIndex } = extractHtmlIndexes(html);
+    let { styleIndex, styleEndIndex } = findStyleTagIndexes(html);
 
     while (styleIndex > -1) {
         html = removeCommentsFromHtml(html, HtmlCommentStart, styleEndIndex, styleIndex);
         html = removeCommentsFromHtml(html, HtmlCommentStart2, styleEndIndex, styleIndex);
         html = removeCommentsFromHtml(html, HtmlCommentEnd, styleEndIndex, styleIndex);
 
-        ({ styleIndex, styleEndIndex } = extractHtmlIndexes(html, styleEndIndex + 1));
+        ({ styleIndex, styleEndIndex } = findStyleTagIndexes(html, styleEndIndex + 1));
     }
 
     return html;
 }
 
-function extractHtmlIndexes(html: string, startIndex: number = 0) {
-    const htmlLowercase = html.toLowerCase();
-    let styleIndex = htmlLowercase.indexOf(styleTag, startIndex);
-    let currentIndex = styleIndex + styleTag.length;
-    let nextChar = html.substring(currentIndex, currentIndex + 1);
-
-    while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
-        styleIndex = htmlLowercase.indexOf(styleTag, styleIndex + 1);
-        currentIndex = styleIndex + styleTag.length;
-        nextChar = html.substring(currentIndex, currentIndex + 1);
-    }
-
-    const styleEndIndex = htmlLowercase.indexOf(styleClosingTag, startIndex);
-    return { styleIndex, styleEndIndex };
-}
-
 function removeCommentsFromHtml(
     html: string,
     marker: string,

diff --git a/packages/roosterjs-content-model-dom/lib/domUtils/findStyleTagIndexes.ts b/packages/roosterjs-content-model-dom/lib/domUtils/findStyleTagIndexes.ts
@@ -0,0 +1,31 @@
+const StyleTag = '<style';
+const StyleClosingTag = '</style>';
+const nonWordCharacterRegex = /\W/;
+
+/**
+ * Find the indexes of the next `<style>...</style>` block in an HTML string.
+ * The opening match is rejected if the character after `<style` is a word
+ * character (e.g. `<styles>` would otherwise falsely match).
+ * @param html The HTML string to scan
+ * @param startIndex Index to start searching from (default 0)
+ * @returns Object with `styleIndex` (start of `<style`) and `styleEndIndex` (start of `</style>`).
+ *          Either may be `-1` if not found.
+ */
+export function findStyleTagIndexes(
+    html: string,
+    startIndex: number = 0
+): { styleIndex: number; styleEndIndex: number } {
+    const htmlLowercase = html.toLowerCase();
+    let styleIndex = htmlLowercase.indexOf(StyleTag, startIndex);
+    let currentIndex = styleIndex + StyleTag.length;
+    let nextChar = html.substring(currentIndex, currentIndex + 1);
+
+    while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
+        styleIndex = htmlLowercase.indexOf(StyleTag, styleIndex + 1);
+        currentIndex = styleIndex + StyleTag.length;
+        nextChar = html.substring(currentIndex, currentIndex + 1);
+    }
+
+    const styleEndIndex = htmlLowercase.indexOf(StyleClosingTag, startIndex);
+    return { styleIndex, styleEndIndex };
+}
diff --git a/packages/roosterjs-content-model-dom/lib/index.ts b/packages/roosterjs-content-model-dom/lib/index.ts
@@ -40,6 +40,7 @@ export { reuseCachedElement } from './domUtils/reuseCachedElement';
 export { isWhiteSpacePreserved } from './domUtils/isWhiteSpacePreserved';
 export { normalizeRect } from './domUtils/normalizeRect';
 export { scrollRectIntoView } from './domUtils/scrollRectIntoView';
+export { findStyleTagIndexes } from './domUtils/findStyleTagIndexes';
 
 export { setLinkUndeletable, isLinkUndeletable } from './domUtils/hiddenProperties/undeletableLink';
 

diff --git a/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts b/packages/roosterjs-content-model-plugins/lib/paste/WordDesktop/getStyleMetadata.ts
@@ -1,41 +1,23 @@
-import { getObjectKeys } from 'roosterjs-content-model-dom';
+import { findStyleTagIndexes, getObjectKeys } from 'roosterjs-content-model-dom';
 import type { WordMetadata } from './WordMetadata';
 
 const FORMATING_REGEX = /[\n\t'{}"]+/g;
 const STYLE_TAG = '<style';
-const STYLE_TAG_END = '</style>';
-const nonWordCharacterRegex = /\W/;
 
 function extractStyleTagsFromHtml(htmlContent: string): string[] {
     const styles: string[] = [];
 
-    let { styleIndex, styleEndIndex } = extractHtmlIndexes(htmlContent);
+    let { styleIndex, styleEndIndex } = findStyleTagIndexes(htmlContent);
     while (styleIndex >= 0 && styleEndIndex >= 0) {
         const styleContent = htmlContent
             .substring(styleIndex + STYLE_TAG.length, styleEndIndex)
             .trim();
         styles.push(styleContent);
-        ({ styleIndex, styleEndIndex } = extractHtmlIndexes(htmlContent, styleEndIndex + 1));
+        ({ styleIndex, styleEndIndex } = findStyleTagIndexes(htmlContent, styleEndIndex + 1));
     }
     return styles;
 }
 
-function extractHtmlIndexes(html: string, startIndex: number = 0) {
-    const htmlLowercase = html.toLowerCase();
-    let styleIndex = htmlLowercase.indexOf(STYLE_TAG, startIndex);
-    let currentIndex = styleIndex + STYLE_TAG.length;
-    let nextChar = html.substring(currentIndex, currentIndex + 1);
-
-    while (!nonWordCharacterRegex.test(nextChar) && styleIndex > -1) {
-        styleIndex = htmlLowercase.indexOf(STYLE_TAG, styleIndex + 1);
-        currentIndex = styleIndex + STYLE_TAG.length;
-        nextChar = html.substring(currentIndex, currentIndex + 1);
-    }
-
-    const styleEndIndex = htmlLowercase.indexOf(STYLE_TAG_END, startIndex);
-    return { styleIndex, styleEndIndex };
-}
-
 /**
  * @internal
  * Word Desktop content has a style tag that contains data for the lists.