CharacterTextSplitter

gitinsp.infrastructure.parser.CharacterTextSplitter
class CharacterTextSplitter(_separator: String, _isSeparatorRegex: Boolean) extends TextSplitter

Attributes

Graph
Supertypes
class TextSplitter
trait LazyLogging
trait DocumentSplitter
class Object
trait Matchable
class Any
Show all

Members list

Type members

Classlikes

case object DontKeep extends KeepSeparator

Attributes

Supertypes
trait Singleton
trait Product
trait Mirror
trait Serializable
trait Product
trait Equals
class Object
trait Matchable
class Any
Show all
Self type
DontKeep.type
case object KeepAtEnd extends KeepSeparator

Attributes

Supertypes
trait Singleton
trait Product
trait Mirror
trait Serializable
trait Product
trait Equals
class Object
trait Matchable
class Any
Show all
Self type
KeepAtEnd.type
case object KeepAtStart extends KeepSeparator

Attributes

Supertypes
trait Singleton
trait Product
trait Mirror
trait Serializable
trait Product
trait Equals
class Object
trait Matchable
class Any
Show all
Self type
sealed trait KeepSeparator

Attributes

Supertypes
class Object
trait Matchable
class Any
Known subtypes
object DontKeep
object KeepAtEnd
object KeepAtStart

Value members

Concrete methods

def processEither(either: Either[Boolean, String]): Boolean
override def splitText(text: String): List[String]

Abstract method to be implemented by subclasses. Defines the core splitting logic for a given text.

Abstract method to be implemented by subclasses. Defines the core splitting logic for a given text.

Attributes

Definition Classes
def splitTextWithRegex(text: String, separator: String, keepSeparator: Either[Boolean, String]): List[String]

Inherited methods

def createDocuments(texts: List[String], metadatas: Option[List[Metadata]]): List[TextSegment]

Creates TextSegment objects from a list of texts and optional metadata. It calls the subclass's splitText implementation to get initial chunks and then formats them into TextSegments, potentially adding start index metadata.

Creates TextSegment objects from a list of texts and optional metadata. It calls the subclass's splitText implementation to get initial chunks and then formats them into TextSegments, potentially adding start index metadata.

Attributes

Inherited from:
TextSplitter
def createSegment(text: String, document: Document, index: Int): TextSegment

Attributes

Inherited from:
TextSplitter
def split(doc: Document): List[TextSegment]

Entry point for splitting a single Langchain4j Document.

Entry point for splitting a single Langchain4j Document.

Attributes

Inherited from:
TextSplitter
def splitAll(x$0: List[Document]): List[TextSegment]

Attributes

Inherited from:
DocumentSplitter
def splitDocuments(documents: Iterable[Document]): List[TextSegment]

Splits multiple documents into a list of TextSegments.

Splits multiple documents into a list of TextSegments.

Attributes

Inherited from:
TextSplitter

Inherited fields

val addStartIndex: Boolean

Attributes

Inherited from:
TextSplitter
val chunkOverlap: Int

Attributes

Inherited from:
TextSplitter
val chunkSize: Int

Attributes

Inherited from:
TextSplitter
val keepSeparator: Either[Boolean, String]

Attributes

Inherited from:
TextSplitter
val lengthFunction: String => Int

Attributes

Inherited from:
TextSplitter
lazy protected val logger: Logger

Attributes

Inherited from:
LazyLogging
val stripWhitespace: Boolean

Attributes

Inherited from:
TextSplitter