diff --git a/sapl/base/search_indexes.py b/sapl/base/search_indexes.py
index f685cb002..ed811cebb 100644
--- a/sapl/base/search_indexes.py
+++ b/sapl/base/search_indexes.py
@@ -21,6 +21,7 @@ from sapl.compilacao.models import (STATUS_TA_IMMUTABLE_PUBLIC,
 from sapl.materia.models import DocumentoAcessorio, MateriaLegislativa
 from sapl.norma.models import NormaJuridica
 from sapl.settings import BASE_DIR, SOLR_URL
+from sapl.utils import RemoveTag
 
 logger = logging.getLogger(BASE_DIR.name)
 
@@ -51,10 +52,17 @@ class TextExtractField(CharField):
         return extracted_data
 
     def whoosh_extraction(self, arquivo):
-        return textract.process(
-            arquivo.path,
-            language='pt-br').decode('utf-8').replace('\n', ' ').replace(
-            '\t', ' ')
+
+        if arquivo.path.endswith('html') or arquivo.path.endswith('xml'):
+            with open(arquivo.path, 'r', encoding="utf8", errors='ignore') as f:
+                content = ' '.join(f.read())
+                return RemoveTag(content)
+
+        else:
+            return textract.process(
+                arquivo.path,
+                language='pt-br').decode('utf-8').replace('\n', ' ').replace(
+                '\t', ' ')
 
     def print_error(self, arquivo):
         msg = 'Erro inesperado processando arquivo: %s' % (
@@ -82,7 +90,8 @@ class TextExtractField(CharField):
             except ExtensionNotSupported as e:
                 print(str(e))
                 logger.error(str(e))
-            except Exception:
+            except Exception as e2:
+                print(str(e2))
                 self.print_error(arquivo)
         return ''
 
diff --git a/sapl/utils.py b/sapl/utils.py
index 196e5b5cf..8f123b2ef 100644
--- a/sapl/utils.py
+++ b/sapl/utils.py
@@ -716,3 +716,19 @@ def TrocaTag(texto, startTag, endTag, sizeStart, sizeEnd, styleName):
                 i = i + 1
 
     return textoSaida
+
+
+def RemoveTag(texto):
+    textoSaida = ''
+    i = 0
+
+    while (i < len(texto)):
+
+        if (texto[i] == '<'):
+            i = ExtraiTag(texto, i)
+
+        else:
+            textoSaida += texto[i]
+            i = i + 1
+
+    return textoSaida
\ No newline at end of file