Skip to content

structure

__all__ = ['BaseConversationMemory', 'ConversationMemory', 'Run', 'SummaryConversationMemory'] module-attribute

BaseConversationMemory

Bases: SerializableMixin, ABC

Source code in griptape/memory/structure/base_conversation_memory.py
@define
class BaseConversationMemory(SerializableMixin, ABC):
    conversation_memory_driver: BaseConversationMemoryDriver = field(
        default=Factory(lambda: Defaults.drivers_config.conversation_memory_driver), kw_only=True
    )
    runs: list[Run] = field(factory=list, kw_only=True, metadata={"serializable": True})
    meta: dict[str, Any] = field(factory=dict, kw_only=True, metadata={"serializable": True})
    autoload: bool = field(default=True, kw_only=True)
    autoprune: bool = field(default=True, kw_only=True)
    max_runs: int | None = field(default=None, kw_only=True, metadata={"serializable": True})

    def __attrs_post_init__(self) -> None:
        if self.autoload:
            self.load_runs()

    def before_add_run(self) -> None:
        pass

    def add_run(self, run: Run) -> BaseConversationMemory:
        self.before_add_run()
        self.try_add_run(run)
        self.after_add_run()

        return self

    def after_add_run(self) -> None:
        if self.max_runs:
            while len(self.runs) > self.max_runs:
                self.runs.pop(0)
        self.conversation_memory_driver.store(self.runs, self.meta)

    @abstractmethod
    def try_add_run(self, run: Run) -> None:
        pass

    @abstractmethod
    def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
        pass

    def load_runs(self) -> list[Run]:
        runs, meta = self.conversation_memory_driver.load()
        self.runs.extend(runs)
        self.meta = dict_merge(self.meta, meta)

        return self.runs

    def add_to_prompt_stack(
        self, prompt_driver: BasePromptDriver, prompt_stack: PromptStack, index: int | None = None
    ) -> PromptStack:
        """Add the Conversation Memory runs to the Prompt Stack by modifying the messages in place.

        If autoprune is enabled, this will fit as many Conversation Memory runs into the Prompt Stack
        as possible without exceeding the token limit.

        Args:
            prompt_driver: The Prompt Driver to use for token counting.
            prompt_stack: The Prompt Stack to add the Conversation Memory to.
            index: Optional index to insert the Conversation Memory runs at.
                   Defaults to appending to the end of the Prompt Stack.
        """
        num_runs_to_fit_in_prompt = len(self.runs)

        if self.autoprune:
            should_prune = True
            temp_stack = PromptStack()

            # Try to determine how many Conversation Memory runs we can
            # fit into the Prompt Stack without exceeding the token limit.
            while should_prune and num_runs_to_fit_in_prompt > 0:
                temp_stack.messages = prompt_stack.messages.copy()

                # Add n runs from Conversation Memory.
                # Where we insert into the Prompt Stack doesn't matter here
                # since we only care about the total token count.
                memory_inputs = self.to_prompt_stack(num_runs_to_fit_in_prompt).messages
                temp_stack.messages.extend(memory_inputs)

                # Convert the Prompt Stack into tokens left.
                tokens_left = prompt_driver.tokenizer.count_input_tokens_left(
                    prompt_driver.prompt_stack_to_string(temp_stack),
                )
                if tokens_left > 0:
                    # There are still tokens left, no need to prune.
                    should_prune = False
                else:
                    # There were not any tokens left, prune one run and try again.
                    num_runs_to_fit_in_prompt -= 1

        if num_runs_to_fit_in_prompt:
            memory_inputs = self.to_prompt_stack(num_runs_to_fit_in_prompt).messages
            if index is None:
                prompt_stack.messages.extend(memory_inputs)
            else:
                prompt_stack.messages[index:index] = memory_inputs

        return prompt_stack

autoload = field(default=True, kw_only=True) class-attribute instance-attribute

autoprune = field(default=True, kw_only=True) class-attribute instance-attribute

conversation_memory_driver = field(default=Factory(lambda: Defaults.drivers_config.conversation_memory_driver), kw_only=True) class-attribute instance-attribute

max_runs = field(default=None, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

meta = field(factory=dict, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

runs = field(factory=list, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

__attrs_post_init__()

Source code in griptape/memory/structure/base_conversation_memory.py
def __attrs_post_init__(self) -> None:
    if self.autoload:
        self.load_runs()

add_run(run)

Source code in griptape/memory/structure/base_conversation_memory.py
def add_run(self, run: Run) -> BaseConversationMemory:
    self.before_add_run()
    self.try_add_run(run)
    self.after_add_run()

    return self

add_to_prompt_stack(prompt_driver, prompt_stack, index=None)

Add the Conversation Memory runs to the Prompt Stack by modifying the messages in place.

If autoprune is enabled, this will fit as many Conversation Memory runs into the Prompt Stack as possible without exceeding the token limit.

Parameters:

Name Type Description Default
prompt_driver BasePromptDriver

The Prompt Driver to use for token counting.

required
prompt_stack PromptStack

The Prompt Stack to add the Conversation Memory to.

required
index int | None

Optional index to insert the Conversation Memory runs at. Defaults to appending to the end of the Prompt Stack.

None
Source code in griptape/memory/structure/base_conversation_memory.py
def add_to_prompt_stack(
    self, prompt_driver: BasePromptDriver, prompt_stack: PromptStack, index: int | None = None
) -> PromptStack:
    """Add the Conversation Memory runs to the Prompt Stack by modifying the messages in place.

    If autoprune is enabled, this will fit as many Conversation Memory runs into the Prompt Stack
    as possible without exceeding the token limit.

    Args:
        prompt_driver: The Prompt Driver to use for token counting.
        prompt_stack: The Prompt Stack to add the Conversation Memory to.
        index: Optional index to insert the Conversation Memory runs at.
               Defaults to appending to the end of the Prompt Stack.
    """
    num_runs_to_fit_in_prompt = len(self.runs)

    if self.autoprune:
        should_prune = True
        temp_stack = PromptStack()

        # Try to determine how many Conversation Memory runs we can
        # fit into the Prompt Stack without exceeding the token limit.
        while should_prune and num_runs_to_fit_in_prompt > 0:
            temp_stack.messages = prompt_stack.messages.copy()

            # Add n runs from Conversation Memory.
            # Where we insert into the Prompt Stack doesn't matter here
            # since we only care about the total token count.
            memory_inputs = self.to_prompt_stack(num_runs_to_fit_in_prompt).messages
            temp_stack.messages.extend(memory_inputs)

            # Convert the Prompt Stack into tokens left.
            tokens_left = prompt_driver.tokenizer.count_input_tokens_left(
                prompt_driver.prompt_stack_to_string(temp_stack),
            )
            if tokens_left > 0:
                # There are still tokens left, no need to prune.
                should_prune = False
            else:
                # There were not any tokens left, prune one run and try again.
                num_runs_to_fit_in_prompt -= 1

    if num_runs_to_fit_in_prompt:
        memory_inputs = self.to_prompt_stack(num_runs_to_fit_in_prompt).messages
        if index is None:
            prompt_stack.messages.extend(memory_inputs)
        else:
            prompt_stack.messages[index:index] = memory_inputs

    return prompt_stack

after_add_run()

Source code in griptape/memory/structure/base_conversation_memory.py
def after_add_run(self) -> None:
    if self.max_runs:
        while len(self.runs) > self.max_runs:
            self.runs.pop(0)
    self.conversation_memory_driver.store(self.runs, self.meta)

before_add_run()

Source code in griptape/memory/structure/base_conversation_memory.py
def before_add_run(self) -> None:
    pass

load_runs()

Source code in griptape/memory/structure/base_conversation_memory.py
def load_runs(self) -> list[Run]:
    runs, meta = self.conversation_memory_driver.load()
    self.runs.extend(runs)
    self.meta = dict_merge(self.meta, meta)

    return self.runs

to_prompt_stack(last_n=None) abstractmethod

Source code in griptape/memory/structure/base_conversation_memory.py
@abstractmethod
def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
    pass

try_add_run(run) abstractmethod

Source code in griptape/memory/structure/base_conversation_memory.py
@abstractmethod
def try_add_run(self, run: Run) -> None:
    pass

ConversationMemory

Bases: BaseConversationMemory

Source code in griptape/memory/structure/conversation_memory.py
@define
class ConversationMemory(BaseConversationMemory):
    def try_add_run(self, run: Run) -> None:
        self.runs.append(run)

    def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
        prompt_stack = PromptStack()
        runs = self.runs[-last_n:] if last_n else self.runs
        for run in runs:
            prompt_stack.add_user_message(run.input)
            prompt_stack.add_assistant_message(run.output)
        return prompt_stack

to_prompt_stack(last_n=None)

Source code in griptape/memory/structure/conversation_memory.py
def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
    prompt_stack = PromptStack()
    runs = self.runs[-last_n:] if last_n else self.runs
    for run in runs:
        prompt_stack.add_user_message(run.input)
        prompt_stack.add_assistant_message(run.output)
    return prompt_stack

try_add_run(run)

Source code in griptape/memory/structure/conversation_memory.py
def try_add_run(self, run: Run) -> None:
    self.runs.append(run)

Run

Bases: SerializableMixin

Source code in griptape/memory/structure/run.py
@define(kw_only=True)
class Run(SerializableMixin):
    id: str = field(default=Factory(lambda: uuid.uuid4().hex), metadata={"serializable": True})
    meta: dict | None = field(default=None, metadata={"serializable": True})
    input: BaseArtifact = field(metadata={"serializable": True})
    output: BaseArtifact = field(metadata={"serializable": True})

id = field(default=Factory(lambda: uuid.uuid4().hex), metadata={'serializable': True}) class-attribute instance-attribute

input = field(metadata={'serializable': True}) class-attribute instance-attribute

meta = field(default=None, metadata={'serializable': True}) class-attribute instance-attribute

output = field(metadata={'serializable': True}) class-attribute instance-attribute

SummaryConversationMemory

Bases: BaseConversationMemory

Conversation memory that automatically summarizes older runs, keeping a configurable number of recent runs in full detail.

The memory stores all runs in self.runs and automatically generates an LLM-powered summary of older runs as new ones are added. Only the summary and the most recent runs (controlled by offset) are included in the prompt context via to_prompt_stack().

Note on display utilities:

  • Conversation utility (griptape.utils.Conversation): Displays all runs stored in memory, not just the summary or the unsummarized portion. When used with SummaryConversationMemory, it prints every Q/A pair from self.runs followed by the generated summary. This is expected behavior -- the runs list preserves the full history for inspection, while the summary is used internally for prompt context.

  • Chat utility (griptape.utils.Chat): Calls Structure.run(), which invokes to_prompt_stack() internally. This means only the summary and the unsummarized recent runs (those within offset) are sent to the LLM as context.

Attributes:

Name Type Description
offset int

Maximum number of recent runs to keep unsummarized. When a new run is added and the count of unsummarized runs exceeds offset, the oldest excess runs are summarized into a single condensed summary string. Defaults to 1.

autoprune bool

Inherited from BaseConversationMemory. When enabled, add_to_prompt_stack() further trims the prompt context to fit within the model's token limit, on top of the summary/offset pruning already performed by to_prompt_stack(). Does not remove runs from self.runs.

Source code in griptape/memory/structure/summary_conversation_memory.py
@define
class SummaryConversationMemory(BaseConversationMemory):
    """Conversation memory that automatically summarizes older runs, keeping a configurable number of recent runs in full detail.

    The memory stores **all** runs in ``self.runs`` and automatically generates an LLM-powered
    summary of older runs as new ones are added. Only the summary and the most recent runs
    (controlled by ``offset``) are included in the prompt context via ``to_prompt_stack()``.

    Note on display utilities:

    - **Conversation utility** (``griptape.utils.Conversation``): Displays **all** runs stored
      in memory, not just the summary or the unsummarized portion. When used with
      ``SummaryConversationMemory``, it prints every Q/A pair from ``self.runs`` followed
      by the generated summary. This is expected behavior -- the runs list preserves the
      full history for inspection, while the summary is used internally for prompt context.

    - **Chat utility** (``griptape.utils.Chat``): Calls ``Structure.run()``, which invokes
      ``to_prompt_stack()`` internally. This means only the summary and the unsummarized
      recent runs (those within ``offset``) are sent to the LLM as context.

    Attributes:
        offset: Maximum number of recent runs to keep unsummarized. When a new run is
            added and the count of unsummarized runs exceeds ``offset``, the oldest excess runs are summarized
            into a single condensed summary string. Defaults to 1.
        autoprune: Inherited from ``BaseConversationMemory``. When enabled,
            ``add_to_prompt_stack()`` further trims the prompt context to fit within
            the model's token limit, on top of the summary/offset pruning already
            performed by ``to_prompt_stack()``. Does not remove runs from ``self.runs``.
    """

    offset: int = field(default=1, kw_only=True, metadata={"serializable": True})
    prompt_driver: BasePromptDriver = field(
        kw_only=True, default=Factory(lambda: Defaults.drivers_config.prompt_driver)
    )
    summary: str | None = field(default=None, kw_only=True, metadata={"serializable": True})
    summary_index: int = field(default=0, kw_only=True, metadata={"serializable": True})
    summary_get_template: J2 = field(default=Factory(lambda: J2("memory/conversation/summary.j2")), kw_only=True)
    summarize_conversation_get_template: J2 = field(
        default=Factory(lambda: J2("memory/conversation/summarize_conversation.j2")),
        kw_only=True,
    )

    # Set meta['summary'] after initializing self.summary, because load_runs() will overwrite it with an empty value from meta.
    def __attrs_post_init__(self) -> None:
        if self.summary is not None:
            self.meta["summary"] = self.summary
            self.meta["summary_index"] = self.summary_index
        super().__attrs_post_init__()

    def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
        stack = PromptStack()
        if self.summary:
            stack.add_user_message(self.summary_get_template.render(summary=self.summary))
        for r in self.unsummarized_runs(last_n):
            stack.add_user_message(r.input)
            stack.add_assistant_message(r.output)
        return stack

    def unsummarized_runs(self, last_n: int | None = None) -> list[Run]:
        summary_index_runs = self.runs[self.summary_index :]

        if last_n:
            last_n_runs = self.runs[-last_n:]

            if len(summary_index_runs) > len(last_n_runs):
                return last_n_runs
            return summary_index_runs
        return summary_index_runs

    def try_add_run(self, run: Run) -> None:
        self.runs.append(run)
        unsummarized_runs = self.unsummarized_runs()
        runs_to_summarize = unsummarized_runs[: max(0, len(unsummarized_runs) - self.offset)]

        if len(runs_to_summarize) > 0:
            self.summary = self.summarize_runs(self.summary, runs_to_summarize)
            self.summary_index = 1 + self.runs.index(runs_to_summarize[-1])

    def summarize_runs(self, previous_summary: str | None, runs: list[Run]) -> str | None:
        try:
            if len(runs) > 0:
                summary = self.summarize_conversation_get_template.render(summary=previous_summary, runs=runs)
                return self.prompt_driver.run(
                    PromptStack(messages=[Message(summary, role=Message.USER_ROLE)]),
                ).to_text()
            return previous_summary
        except Exception as e:
            logging.exception("Error summarizing memory: %s(%s)", type(e).__name__, e)

            return previous_summary

    def load_runs(self) -> list[Run]:
        runs = super().load_runs()
        self.summary = self.meta.get("summary")
        self.summary_index = self.meta.get("summary_index", 0)
        return runs

offset = field(default=1, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

prompt_driver = field(kw_only=True, default=Factory(lambda: Defaults.drivers_config.prompt_driver)) class-attribute instance-attribute

summarize_conversation_get_template = field(default=Factory(lambda: J2('memory/conversation/summarize_conversation.j2')), kw_only=True) class-attribute instance-attribute

summary = field(default=None, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

summary_get_template = field(default=Factory(lambda: J2('memory/conversation/summary.j2')), kw_only=True) class-attribute instance-attribute

summary_index = field(default=0, kw_only=True, metadata={'serializable': True}) class-attribute instance-attribute

__attrs_post_init__()

Source code in griptape/memory/structure/summary_conversation_memory.py
def __attrs_post_init__(self) -> None:
    if self.summary is not None:
        self.meta["summary"] = self.summary
        self.meta["summary_index"] = self.summary_index
    super().__attrs_post_init__()

load_runs()

Source code in griptape/memory/structure/summary_conversation_memory.py
def load_runs(self) -> list[Run]:
    runs = super().load_runs()
    self.summary = self.meta.get("summary")
    self.summary_index = self.meta.get("summary_index", 0)
    return runs

summarize_runs(previous_summary, runs)

Source code in griptape/memory/structure/summary_conversation_memory.py
def summarize_runs(self, previous_summary: str | None, runs: list[Run]) -> str | None:
    try:
        if len(runs) > 0:
            summary = self.summarize_conversation_get_template.render(summary=previous_summary, runs=runs)
            return self.prompt_driver.run(
                PromptStack(messages=[Message(summary, role=Message.USER_ROLE)]),
            ).to_text()
        return previous_summary
    except Exception as e:
        logging.exception("Error summarizing memory: %s(%s)", type(e).__name__, e)

        return previous_summary

to_prompt_stack(last_n=None)

Source code in griptape/memory/structure/summary_conversation_memory.py
def to_prompt_stack(self, last_n: int | None = None) -> PromptStack:
    stack = PromptStack()
    if self.summary:
        stack.add_user_message(self.summary_get_template.render(summary=self.summary))
    for r in self.unsummarized_runs(last_n):
        stack.add_user_message(r.input)
        stack.add_assistant_message(r.output)
    return stack

try_add_run(run)

Source code in griptape/memory/structure/summary_conversation_memory.py
def try_add_run(self, run: Run) -> None:
    self.runs.append(run)
    unsummarized_runs = self.unsummarized_runs()
    runs_to_summarize = unsummarized_runs[: max(0, len(unsummarized_runs) - self.offset)]

    if len(runs_to_summarize) > 0:
        self.summary = self.summarize_runs(self.summary, runs_to_summarize)
        self.summary_index = 1 + self.runs.index(runs_to_summarize[-1])

unsummarized_runs(last_n=None)

Source code in griptape/memory/structure/summary_conversation_memory.py
def unsummarized_runs(self, last_n: int | None = None) -> list[Run]:
    summary_index_runs = self.runs[self.summary_index :]

    if last_n:
        last_n_runs = self.runs[-last_n:]

        if len(summary_index_runs) > len(last_n_runs):
            return last_n_runs
        return summary_index_runs
    return summary_index_runs