Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include output from interactive cells in Foyle requests #1756

Merged
merged 5 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/extension/ai/converters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,31 @@ import { ServerLifecycleIdentity, getServerConfigurationValue } from '../../util
import { Serializer } from '../../types'
import * as serializerTypes from '../grpc/serializerTypes'
import * as serializer from '../serializer'
import { Kernel } from '../kernel'

import * as protos from './protos'

// Converter provides converstion routines from vscode data types to protocol buffer types.
// It is a class because in order to handle the conversion we need to keep track of the kernel
// because we need to add execution information to the cells before serializing.
export class Converter {
kernel: Kernel
constructor(kernel: Kernel) {
this.kernel = kernel
}

// notebokDataToProto converts a VSCode NotebookData to a RunMe Notebook proto.
// It adds execution information to the cells before converting.
public async notebookDataToProto(notebookData: vscode.NotebookData): Promise<parser_pb.Notebook> {
// We need to add the execution info to the cells so that the AI model can use that information.
const cellDataWithExec = await serializer.SerializerBase.addExecInfo(notebookData, this.kernel)
let notebookDataWithExec = new vscode.NotebookData(cellDataWithExec)
// marshalNotebook returns a protocol buffer using the ts client library from buf we need to
// convert it to es
let notebookProto = serializer.GrpcSerializer.marshalNotebook(notebookDataWithExec)
return protos.notebookTSToES(notebookProto)
}
}

// cellToCellData converts a NotebookCell to a NotebookCellData.
// NotebookCell is an interface used by the editor.
Expand Down
10 changes: 6 additions & 4 deletions src/extension/ai/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import {
GenerateCellsResponse,
} from '@buf/jlewi_foyle.bufbuild_es/foyle/v1alpha1/agent_pb'

import * as serializer from '../serializer'
import getLogger from '../logger'

import { Converter } from './converters'
import * as protos from './protos'
import * as converters from './converters'
const log = getLogger('AIGenerate')
Expand All @@ -17,9 +17,11 @@ const log = getLogger('AIGenerate')
// It generates a single completion
export class CompletionGenerator {
client: PromiseClient<typeof AIService>
converter: Converter

constructor(client: PromiseClient<typeof AIService>) {
constructor(client: PromiseClient<typeof AIService>, converter: Converter) {
this.client = client
this.converter = converter
}

public generateCompletion = async () => {
Expand All @@ -43,10 +45,10 @@ export class CompletionGenerator {
let cellData = editor?.notebook.getCells().map((cell) => converters.cellToCellData(cell))
let notebookData = new vscode.NotebookData(cellData)

let notebookProto = serializer.GrpcSerializer.marshalNotebook(notebookData)
let notebookProto = await this.converter.notebookDataToProto(notebookData)

const req = new GenerateCellsRequest()
req.notebook = protos.notebookTSToES(notebookProto)
req.notebook = notebookProto
req.selectedIndex = lastSelectedCell

this.client
Expand Down
64 changes: 35 additions & 29 deletions src/extension/ai/ghost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import * as agent_pb from '@buf/jlewi_foyle.bufbuild_es/foyle/v1alpha1/agent_pb'
import { StreamGenerateRequest_Trigger } from '@buf/jlewi_foyle.bufbuild_es/foyle/v1alpha1/agent_pb'

import getLogger from '../logger'
import * as serializer from '../serializer'
import { RUNME_CELL_ID } from '../constants'

import * as converters from './converters'
Expand Down Expand Up @@ -47,15 +46,16 @@ const ghostDecoration = vscode.window.createTextEditorDecorationType({
// the cell contents have changed.
export class GhostCellGenerator implements stream.CompletionHandlers {
private notebookState: Map<vscode.Uri, NotebookState>

private converter: converters.Converter
// contextID is the ID of the context we are generating completions for.
// It is used to detect whether a completion response is stale and should be
// discarded because the context has changed.

constructor() {
constructor(converter: converters.Converter) {
this.notebookState = new Map<vscode.Uri, NotebookState>()
// Generate a random context ID. This should be unnecessary because presumable the event to change
// the active cell will be sent before any requests are sent but it doesn't hurt to be safe.
this.converter = converter
}

// Updated method to check and initialize notebook state
Expand All @@ -70,10 +70,10 @@ export class GhostCellGenerator implements stream.CompletionHandlers {
// This is a stateful transformation because we need to decide whether to send the full document or
// the incremental changes. It will return a null request if the event should be ignored or if there
// is an error preventing it from computing a proper request.
buildRequest(
async buildRequest(
cellChangeEvent: stream.CellChangeEvent,
firstRequest: boolean,
): agent_pb.StreamGenerateRequest | null {
): Promise<agent_pb.StreamGenerateRequest | null> {
// TODO(jeremy): Is there a more efficient way to find the cell and notebook?
// Can we cache it in the class? Since we keep track of notebooks in NotebookState
// Is there a way we can go from the URI of the cell to the URI of the notebook directly
Expand All @@ -86,7 +86,7 @@ export class GhostCellGenerator implements stream.CompletionHandlers {
if (notebook === undefined) {
log.error(`notebook for cell ${cellChangeEvent.notebookUri} NOT found`)
// TODO(jermey): Should we change the return type to be nullable?
return null
return Promise.resolve(null)
}

// Get the notebook state; this will initialize it if this is the first time we
Expand Down Expand Up @@ -121,39 +121,34 @@ export class GhostCellGenerator implements stream.CompletionHandlers {
let cellData = notebook.getCells().map((cell) => converters.cellToCellData(cell))
let notebookData = new vscode.NotebookData(cellData)

let notebookProto = serializer.GrpcSerializer.marshalNotebook(notebookData)
let notebookProto = await this.converter.notebookDataToProto(notebookData)
let request = new agent_pb.StreamGenerateRequest({
contextId: SessionManager.getManager().getID(),
request: {
case: 'fullContext',
value: new agent_pb.FullContext({
notebook: protos.notebookTSToES(notebookProto),
notebook: notebookProto,
selected: matchedCell.index,
notebookUri: notebook.uri.toString(),
}),
},
trigger: cellChangeEvent.trigger,
})

return request
} else {
let cellData = converters.cellToCellData(matchedCell)
let notebookData = new vscode.NotebookData([cellData])

let notebookProto = serializer.GrpcSerializer.marshalNotebook(notebookData)
let notebook = protos.notebookTSToES(notebookProto)
// Generate an update request
let notebookProto = await this.converter.notebookDataToProto(notebookData)
let request = new agent_pb.StreamGenerateRequest({
contextId: SessionManager.getManager().getID(),
request: {
case: 'update',
value: new agent_pb.UpdateContext({
cell: notebook.cells[0],
cell: notebookProto.cells[0],
}),
},
trigger: cellChangeEvent.trigger,
})

return request
}
}
Expand Down Expand Up @@ -347,13 +342,16 @@ export class CellChangeEventGenerator {
return
}

this.streamCreator.handleEvent(
new stream.CellChangeEvent(
notebook.uri.toString(),
matchedCell.index,
StreamGenerateRequest_Trigger.CELL_TEXT_CHANGE,
),
)
// N.B. handlEvent is aysnc. So we need to use "then" to make sure the event gets processed
this.streamCreator
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sourishkrout Is this the right way to call an async function from a non async function?
Since the return type is null we don't need to await the async function.
However, I think in the past you mentioned that if you invoke an async function but don't do anything with its return value it might not get scheduled.
Does using "then(()=> {})" solve this problem?

Copy link
Member

@sourishkrout sourishkrout Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not solve the problem. At JS runtime, there isn't such a thing as an "async function". What makes it async is returning a Promise<> type. Async/await uses a generator under the hood to pause execution to unravel promises; however, semantically, there's no difference to .then(...). It just makes you write async code that looks much more like sync code for readability.

That being said, if a promise type ("a future") is not being then'd or awaited upstream, you wind up with the same exact problem no matter how the promise is "returned."

The only reason this likely appears to work is that promises start running immediately (as opposed to when you await/then them), and VS Code is a long-running process, so the scopes live long enough to allow the promises to complete. I'm making assumptions here because I haven't thoroughly inspected the upstream code.

As a Golang programmer, this is the same as running three go func() inside a function but not using a WaitGroup to synchronize them. Similarly, this might work if the "main thread" runs long enough for all three to complete and no downstream processing requires their completion. Otherwise, all concurrent functions get killed when the main thread dies.

In other words, my suggestion to use async was only intended to be cosmetic. If there is a problem with async execution, both then and await will be prone to it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only reason this likely appears to work is that promises start running immediately (as opposed to when you await/then them), and VS Code is a long-running process, so the scopes live long enough to allow the promises to complete

That seems fine to me. Concretely, it seems fine to treat it as a fire and forget and assume

  1. vscode runs long enough for them to complete
    or
  2. If vscode shuts down early enough then the request might not have completed.

Here's the problem I'm trying to solve. handleEvent is an async function being called from a non async function handleOnDidChangeNotebookCell. handleOnDidChangeNotebookCell is the listener for events

vscode.workspace.onDidChangeTextDocument(eventGenerator.handleOnDidChangeNotebookCell),

I originally thought I couldn't declare handleEvent as async and include an await function because I thought the listener couldn't be an async function. However, it looks like if I declare handleOnDidChangeNotebookCell to be async it still works fine; so I could update it to be async and then include an await function.

However, my suposition is that if the listener (handleOnDidChangeNotebookCell) is async its returning a promise that is not being awaited on. So we still wind up with a Promise that we aren't awaiting its just happening in a different part of the code.

Fundamentally, I think this is alright. The listener (handleOnDidChangeNotebookCell) is firing off a request to generate a completion and the response is handled asynchronously. If vscode exits before that async function can get scheduled and finish processing than we are dropping the completion generation logic on the floor rather than doing some graceful handling/shutdown. But dropping it on the floor seems fine; I'm not sure what graceful handling would actually look like.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I changed handleOnDidChangeNotebookCell and added some awaits. I made this change because I agree with your earlier comment that using await is cleaner.

@sourishkrout so I think this is good to go but let me know if you think otherwise.

.handleEvent(
new stream.CellChangeEvent(
notebook.uri.toString(),
matchedCell.index,
StreamGenerateRequest_Trigger.CELL_TEXT_CHANGE,
),
)
.then(() => {})
}

// handleOnDidChangeVisibleTextEditors is called when the visible text editors change.
Expand Down Expand Up @@ -384,7 +382,13 @@ export class CellChangeEventGenerator {
}

handleOnDidChangeNotebookDocument = (event: vscode.NotebookDocumentChangeEvent) => {
// N.B. For non-interactive cells this will trigger each time the output is updated.
// For interactive cells this doesn't appear to trigger each time the cell output is updated.
// For example, if you have a long running command (e.g. a bash for loop with a sleep that
// echos a message on each iteration) then this won't trigger on each iteration for
// an interactive cell but will for non-interactive.
event.cellChanges.forEach((change) => {
log.info(`handleOnDidChangeNotebookDocument: change: ${change}`)
if (change.outputs !== undefined) {
// If outputs change then we want to trigger completions.

Expand All @@ -399,13 +403,15 @@ export class CellChangeEventGenerator {
// In particular its possible that the cell that changed is not the active cell. Therefore
// we may not want to generate completions for it. For example, you can have multiple cells
// running. So in principle the active cell could be different from the cell that changed.
this.streamCreator.handleEvent(
new stream.CellChangeEvent(
change.cell.notebook.uri.toString(),
change.cell.index,
StreamGenerateRequest_Trigger.CELL_OUTPUT_CHANGE,
),
)
this.streamCreator
.handleEvent(
new stream.CellChangeEvent(
change.cell.notebook.uri.toString(),
change.cell.index,
StreamGenerateRequest_Trigger.CELL_OUTPUT_CHANGE,
),
)
.then(() => {})
}
})
}
Expand Down
13 changes: 10 additions & 3 deletions src/extension/ai/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import { createPromiseClient, PromiseClient, Transport } from '@connectrpc/conne
import { createConnectTransport } from '@connectrpc/connect-node'
import { AIService } from '@buf/jlewi_foyle.connectrpc_es/foyle/v1alpha1/agent_connect'

import { Kernel } from '../kernel'
import getLogger from '../logger'

import { Converter } from './converters'
import * as ghost from './ghost'
import * as stream from './stream'
import * as generate from './generate'
Expand All @@ -18,14 +20,17 @@ export class AIManager {
subscriptions: vscode.Disposable[] = []
client: PromiseClient<typeof AIService>
completionGenerator: generate.CompletionGenerator
converter: Converter

constructor() {
constructor(kernel: Kernel) {
this.log = getLogger('AIManager')
this.log.info('AI: Initializing AI Manager')
const config = vscode.workspace.getConfiguration('runme.experiments')
const autoComplete = config.get<boolean>('aiAutoCell', false)
this.client = this.createAIClient()
this.completionGenerator = new generate.CompletionGenerator(this.client)

this.converter = new Converter(kernel)
this.completionGenerator = new generate.CompletionGenerator(this.client, this.converter)
if (autoComplete) {
this.registerGhostCellEvents()

Expand All @@ -47,7 +52,7 @@ export class AIManager {
// as well as when cells change. This is used to create ghost cells.
registerGhostCellEvents() {
this.log.info('AI: Enabling AutoCell Generation')
let cellGenerator = new ghost.GhostCellGenerator()
let cellGenerator = new ghost.GhostCellGenerator(this.converter)

// Create a stream creator. The StreamCreator is a class that effectively windows events
// and turns each window into an AsyncIterable of streaming requests.
Expand All @@ -74,6 +79,8 @@ export class AIManager {
vscode.window.onDidChangeActiveTextEditor(cellGenerator.handleOnDidChangeActiveTextEditor),
)

// We use onDidChangeNotebookDocument to listen for changes to outputs.
// We use this to trigger updates in response to a cell's output being updated.
this.subscriptions.push(
vscode.workspace.onDidChangeNotebookDocument(
eventGenerator.handleOnDidChangeNotebookDocument,
Expand Down
6 changes: 3 additions & 3 deletions src/extension/ai/stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export interface CompletionHandlers {
buildRequest: (
cellChangeEvent: CellChangeEvent,
firstRequest: boolean,
) => StreamGenerateRequest | null
) => Promise<StreamGenerateRequest | null>

// processResponse is a function that processes a StreamGenerateResponse
processResponse: (response: StreamGenerateResponse) => void
Expand Down Expand Up @@ -75,15 +75,15 @@ export class StreamCreator {
// handleEvent processes a request
// n.b we use arror function definition to ensure this gets properly bound
// see https://www.typescriptlang.org/docs/handbook/2/classes.html#this-at-runtime-in-classes
handleEvent = (event: CellChangeEvent): void => {
handleEvent = async (event: CellChangeEvent): Promise<void> => {
// We need to generate a new request
let firstRequest = false
if (this.lastIterator === undefined || this.lastIterator === null) {
firstRequest = true
}

log.info('handleEvent: building request')
let req = this.handlers.buildRequest(event, firstRequest)
let req = await this.handlers.buildRequest(event, firstRequest)

if (req === null) {
log.info(`Notebook: ${event.notebookUri}; no request generated`)
Expand Down
2 changes: 1 addition & 1 deletion src/extension/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ export class RunmeExtension {
}

// Start the AIManager. This will enable the AI services if the user has enabled them.
const aiManager = new manager.AIManager()
const aiManager = new manager.AIManager(kernel)
// We need to hang onto a reference to the AIManager so it doesn't get garbage collected until the
// extension is deactivated.
context.subscriptions.push(aiManager)
Expand Down