Remove goal from feedback

statelyai · Nov 24, 2024 · 9d65d71 · 9d65d71
1 parent 57070f8
commit 9d65d71
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 32 deletions.
diff --git a/.changeset/swift-mangos-rush.md b/.changeset/swift-mangos-rush.md
@@ -0,0 +1,5 @@
+---
+"@statelyai/agent": patch
+---
+
+Remove `goal` from feedback input
diff --git a/examples/learn-from-feedback.ts b/examples/learn-from-feedback.ts
@@ -80,6 +80,7 @@ Achieve the goal. Consider both exploring unknown actions (high exploration_valu
 
         if (decision?.nextEvent?.type === 'submit') {
           const observation = await agent.addObservation({
+            goal: decision.goal,
             prevState: { value: 'editing' },
             event: { type: 'submit' },
             state: { value: 'editing' },
@@ -88,14 +89,14 @@ Achieve the goal. Consider both exploring unknown actions (high exploration_valu
           // don't change the status; pretend submit button is broken
           await agent.addFeedback({
             observationId: observation.id,
-            goal: 'Submit the form',
             score: 0,
             comment: 'Form not submitted',
           });
         } else if (decision?.nextEvent?.type === 'pressEnter') {
           status = 'submitted';
 
           await agent.addObservation({
+            goal: decision.goal,
             prevState: { value: 'editing' },
             event: { type: 'pressEnter' },
             state: { value: 'submitted' },

diff --git a/src/agent.test.ts b/src/agent.test.ts
@@ -70,28 +70,24 @@ test('agent.addFeedback() adds to feedback', () => {
     model: {} as any,
   });
 
-  const feedback = agent.addFeedback({
-    score: -1,
+  const obs = agent.addObservation({
+    prevState: { value: 'playing' },
+    state: { value: 'lost' },
+    event: { type: 'play', position: 3 },
     goal: 'Win the game',
-    observationId: 'obs-1',
+  });
+
+  const feedback = agent.addFeedback({
+    score: 0,
+    observationId: obs.id,
   });
 
   expect(feedback.episodeId).toEqual(agent.episodeId);
 
   expect(agent.getFeedback()).toContainEqual(
     expect.objectContaining({
-      score: -1,
-      goal: 'Win the game',
-      observationId: 'obs-1',
-      episodeId: expect.any(String),
-      timestamp: expect.any(Number),
-    })
-  );
-  expect(agent.getFeedback()).toContainEqual(
-    expect.objectContaining({
-      score: -1,
-      goal: 'Win the game',
-      observationId: 'obs-1',
+      score: 0,
+      observationId: obs.id,
       episodeId: expect.any(String),
       timestamp: expect.any(Number),
     })
@@ -109,6 +105,7 @@ test('agent.addObservation() adds to observations', () => {
     prevState: { value: 'playing', context: {} },
     event: { type: 'play', position: 3 },
     state: { value: 'lost', context: {} },
+    goal: 'Win the game',
   });
 
   expect(observation.episodeId).toEqual(agent.episodeId);
@@ -133,6 +130,7 @@ test('agent.addObservation() adds to observations (initial state)', () => {
 
   const observation = agent.addObservation({
     state: { value: 'lost' },
+    goal: 'Win the game',
   });
 
   expect(observation.episodeId).toEqual(agent.episodeId);
@@ -170,6 +168,7 @@ test('agent.addObservation() adds to observations with machine hash', () => {
     event: { type: 'play', position: 3 },
     state: { value: 'lost', context: {} },
     machine,
+    goal: 'Win the game',
   });
 
   expect(observation.episodeId).toEqual(agent.episodeId);
@@ -197,29 +196,27 @@ test('agent.addFeedback() adds to feedback (with observation)', () => {
     state: {
       value: 'playing',
     },
+    goal: 'Win the game',
   });
 
   const feedback = agent.addFeedback({
-    score: -1,
-    goal: 'Win the game',
+    score: 0,
     observationId: observation.id,
   });
 
   expect(feedback.episodeId).toEqual(agent.episodeId);
 
   expect(agent.getFeedback()).toContainEqual(
     expect.objectContaining({
-      score: -1,
-      goal: 'Win the game',
+      score: 0,
       observationId: observation.id,
       episodeId: expect.any(String),
       timestamp: expect.any(Number),
     })
   );
   expect(agent.getFeedback()).toContainEqual(
     expect.objectContaining({
-      score: -1,
-      goal: 'Win the game',
+      score: 0,
       observationId: observation.id,
       episodeId: expect.any(String),
       timestamp: expect.any(Number),
@@ -286,7 +283,6 @@ test('You can listen for feedback events', () => {
 
   agent.addFeedback({
     score: -1,
-    goal: 'Win the game',
     observationId: 'obs-1',
   });
 
@@ -431,6 +427,7 @@ test('agent.getDecisions() returns decisions from context', () => {
     model: {} as any,
     strategy: async (agent) => {
       return {
+        id: Date.now().toString(),
         episodeId: agent.episodeId,
         strategy: 'test-strategy',
         goal: '',

diff --git a/src/agent.ts b/src/agent.ts
@@ -391,17 +391,19 @@ export class Agent<
     ) {
       const observation = agent.addObservation(observationInput);
 
-      const input = getInput?.(observation);
+      const interactInput = getInput?.(observation);
 
-      if (input) {
-        const res = await agentDecide(agent, {
+      if (interactInput) {
+        const decision = await agentDecide(agent, {
           machine,
           state: observation.state,
-          ...input,
+          ...interactInput,
         });
 
-        if (res?.nextEvent) {
-          actorRef.send(res.nextEvent);
+        if (decision?.nextEvent) {
+          // @ts-ignore
+          decision.nextEvent['_decision'] = decision.id;
+          actorRef.send(decision.nextEvent);
         }
       }
 
@@ -420,11 +422,20 @@ export class Agent<
               return;
             }
 
+            const decisionId = inspEvent.event['_decision'] as
+              | string
+              | undefined;
+
+            const decision = decisionId
+              ? agent.getDecisions().find((d) => d.id === decisionId)
+              : undefined;
+
             const observationInput = {
               event: inspEvent.event,
               prevState,
               state: inspEvent.snapshot as any,
               machine: (actorRef as any).src,
+              goal: decision?.goal,
             } satisfies AgentObservationInput<any>;
 
             await handleObservation(observationInput);
@@ -439,6 +450,7 @@ export class Agent<
         event: undefined,
         state: actorRef.getSnapshot(),
         machine: (actorRef as any).src,
+        goal: undefined,
       });
     }
 
@@ -464,11 +476,19 @@ export class Agent<
               return;
             }
 
+            const decisionId = inspEvent.event['_decision'] as
+              | string
+              | undefined;
+            const decision = decisionId
+              ? this.getDecisions().find((d) => d.id === decisionId)
+              : undefined;
+
             const observationInput = {
               event: inspEvent.event,
               prevState,
               state: inspEvent.snapshot as any,
               machine: (actorRef as any).src,
+              goal: decision?.goal,
             } satisfies AgentObservationInput<this>;
 
             prevState = observationInput.state;

diff --git a/src/strategies/shortestPath.ts b/src/strategies/shortestPath.ts
@@ -13,6 +13,7 @@ import { z } from 'zod';
 import { zodToJsonSchema } from 'zod-to-json-schema';
 import Ajv from 'ajv';
 import { AnyMachineSnapshot } from 'xstate';
+import { randomId } from '../utils';
 
 const ajv = new Ajv();
 
@@ -165,6 +166,7 @@ Examples:
   const nextStep = leastWeightPath?.steps[0];
 
   return {
+    id: randomId(),
     strategy: 'shortestPath',
     episodeId: agent.episodeId,
     goal: input.goal,

diff --git a/src/strategies/simple.ts b/src/strategies/simple.ts
@@ -86,6 +86,7 @@ export async function simpleStrategy<T extends AnyAgent>(
   }
 
   return {
+    id: randomId(),
     strategy: 'simple',
     goal: input.goal,
     goalState: input.state,

diff --git a/src/types.ts b/src/types.ts
@@ -87,6 +87,7 @@ export type AgentPath<TAgent extends AnyAgent> = {
 };
 
 export type AgentDecision<TAgent extends AnyAgent> = {
+  id: string;
   /**
    * The strategy used to generate the decision
    */
@@ -170,7 +171,6 @@ export type AgentDecideOptions<TAgent extends AnyAgent> = {
 } & Omit<Parameters<typeof generateText>[0], 'model' | 'tools' | 'prompt'>;
 
 export interface AgentFeedback {
-  goal: string;
   observationId: string;
   score: number;
   comment: string | undefined;
@@ -183,7 +183,6 @@ export interface AgentFeedback {
 }
 
 export interface AgentFeedbackInput {
-  goal: string;
   observationId: string;
   score: number;
   comment?: string;
@@ -331,7 +330,7 @@ export type AgentMessageInput = CoreMessage & {
 
 export interface AgentObservation<TActor extends ActorRefLike> {
   id: string;
-  // TODO: goal
+  goal?: string;
   prevState: SnapshotFrom<TActor> | undefined;
   event: EventFrom<TActor> | undefined;
   state: SnapshotFrom<TActor>;
@@ -347,6 +346,7 @@ export interface AgentObservationInput<TAgent extends AnyAgent> {
   state: ObservedState<TAgent>;
   machine?: AnyStateMachine;
   timestamp?: number;
+  goal: string | undefined;
 }
 
 export type AgentDecisionInput = {