Skip to content

Loaders

Loaders simplify the process of ingesting data into stores by handling parsing, preprocessing, and metadata generation. They abstract away the complexity of working directly with stores.

TextLoader

Handles various text formats including plain text, PDFs, and web content.

from agenticrag.loaders import TextLoader
from langchain_google_genai import ChatGoogleGenerativeAI

# Define LLM to utilize automatic description generation and more, you can use any ChatModel from langchain
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key="<your_api_key>",
)

# Initialize with appropriate stores
text_loader = TextLoader(
    text_store=text_store,  
    meta_store=meta_store,
    chunk_size = 2000,
    chunk_overlap = 200,
    llm = llm # or leave it empty to use DEFAULT_LLM (Gemini-2.0-flash), we can use llm 
)

# Load PDF with automatic chunking and metadata generation
text_loader.load_pdf(
    path="data/research_paper.pdf"
)

# Load from web URL
text_loader.load_web(
    url="https://example.com/article",
    name="web_article"
)

TableLoader

Specializes in loading tabular data with automatic schema detection.

from agenticrag.loaders import TableLoader

# Initialize
table_loader = TableLoader(
    table_store=table_store,
    meta_store=meta_store,
    persistence_dir="./table_data",
    llm=llm
)

# Load CSV with automatic schema inference
table_loader.load_csv(
    path="data/customer_data.csv",
    name="customers"
)

# Load from pandas DataFrame
import pandas as pd
df = pd.read_excel("data/financial_report.xlsx")
table_loader.load_dataframe(
    df=df,
    name="financial_data",
)

Creating Custom Loaders

You can create custom loaders for specialized data sources:

from agenticrag.loaders import BaseLoader
from agenticrag.types.exceptions import LoaderError

class JSONLoader(BaseLoader):
    def __init__(self, text_store, meta_store):
        self.text_store = text_store
        self.meta_store = meta_store

    def load_json(self, path, name, description=None):
        import json
        with open(path, 'r') as f:
            data = json.load(f)

        # Convert to text representation
        text_content = json.dumps(data, indent=2)

        # Add metadata
        meta = self.meta_store.add(
            MetaData(
                name=name,
                format=DataFormat.TEXT,
                description=description or f"JSON data from {path}", # or use LLM to generate one
                source= path
            )
        )

        try:
            json_data = self.text_store.add(
                TextData(
                    id=f"json_{name}",
                    name=name,
                    text=text_content
                )
            )
            return meta

        except Exception as e:
            self.meta_store.delete(meta.id) # Always evert data from meta store if error occurred while adding in main store.
            raise LoaderError("Error") from e