[Bug]: error: "IndexError('list index out of range')"
What happened?
I have this typescript project that is trying to load a pdf and embeds into a local Chroma DB
import { Chroma } from 'langchain/vectorstores/chroma';
export async function pdfLoader(llm: OpenAI) {
const loader = new PDFLoader('src/pdfs/SOP.pdf');
log(loader);
const docs = await loader.load();
const vectorStore = await Chroma.fromDocuments(docs, embeddings, {
collectionName: 'AITA',
});
await vectorStore.ensureCollection();
...
The problem is that when I try to use the fromDocuments method is throws this huge log with this data: { error: "IndexError('list index out of range')" } error.
I have chroma set up locally, and I've tested other code to hit the DB, and it's working. Not sure what next steps are?
Versions
Chromadb v1.4.1 javascript
Relevant log output
/Users/kevin/code/src/github/kevmok/node-langchain-prompt/node_modules/.pnpm/[email protected]/node_modules/axios/lib/core/createError.js:16
var error = new Error(message);
^
Error: Request failed with status code 500
at createError (/Users/kevin/code/src/github/kevmok/node-langchain-prompt/node_modules/.pnpm/[email protected]/node_modules/axios/lib/core/createError.js:16:15)
at settle (/Users/kevin/code/src/github/kevmok/node-langchain-prompt/node_modules/.pnpm/[email protected]/node_modules/axios/lib/core/settle.js:17:12)
at IncomingMessage.handleStreamEnd (/Users/kevin/code/src/github/kevmok/node-langchain-prompt/node_modules/.pnpm/[email protected]/node_modules/axios/lib/adapters/http.js:322:11)
at IncomingMessage.emit (node:events:525:35)
at IncomingMessage.emit (node:domain:489:12)
at endReadableNT (node:internal/streams/readable:1359:12)
at process.processTicksAndRejections (node:internal/process/task_queues:82:21) {
config: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
validateStatus: [Function: validateStatus],
headers: {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/0.26.1'
},
method: 'get',
url: 'http://localhost:8000/api/v1/collections/AITA/count',
data: undefined
},
request: <ref *1> ClientRequest {
_events: [Object: null prototype] {
abort: [Function (anonymous)],
aborted: [Function (anonymous)],
connect: [Function (anonymous)],
error: [Function (anonymous)],
socket: [Function (anonymous)],
timeout: [Function (anonymous)],
finish: [Function: requestOnFinish]
},
_eventsCount: 7,
_maxListeners: undefined,
outputData: [],
outputSize: 0,
writable: true,
destroyed: false,
_last: true,
chunkedEncoding: false,
shouldKeepAlive: false,
maxRequestsOnConnectionReached: false,
_defaultKeepAlive: true,
useChunkedEncodingByDefault: false,
sendDate: false,
_removedConnection: false,
_removedContLen: false,
_removedTE: false,
strictContentLength: false,
_contentLength: 0,
_hasBody: true,
_trailer: '',
finished: true,
_headerSent: true,
_closed: false,
socket: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [ [Function: onClose], [Function: socketCloseListener] ],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function: bound onceWrapper] { listener: [Function: destroy] }
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
_header: 'GET /api/v1/collections/AITA/count HTTP/1.1\r\n' +
'Accept: application/json, text/plain, */*\r\n' +
'User-Agent: axios/0.26.1\r\n' +
'Host: localhost:8000\r\n' +
'Connection: close\r\n' +
'\r\n',
_keepAliveTimeout: 0,
_onPendingData: [Function: nop],
agent: Agent {
_events: [Object: null prototype] {
free: [Function (anonymous)],
newListener: [Function: maybeEnableKeylog]
},
_eventsCount: 2,
_maxListeners: undefined,
defaultPort: 80,
protocol: 'http:',
options: [Object: null prototype] { noDelay: true, path: null },
requests: [Object: null prototype] {},
sockets: [Object: null prototype] {
'localhost:8000:': [
<ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: [ReadableState],
_events: [Object: null prototype],
_eventsCount: 7,
_maxListeners: undefined,
_writableState: [WritableState],
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: [TCP],
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
}
]
},
freeSockets: [Object: null prototype] {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256,
scheduling: 'lifo',
maxTotalSockets: Infinity,
totalSocketCount: 1,
[Symbol(kCapture)]: false
},
socketPath: undefined,
method: 'GET',
maxHeaderSize: undefined,
insecureHTTPParser: undefined,
path: '/api/v1/collections/AITA/count',
_ended: true,
res: IncomingMessage {
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: true,
endEmitted: true,
reading: false,
constructed: true,
sync: true,
needReadable: false,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
destroyed: true,
errored: null,
closed: true,
closeEmitted: true,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: true,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [ [Function: responseOnEnd], [Function: handleStreamEnd] ],
data: [Function: handleStreamData],
aborted: [Function: handlerStreamAborted],
error: [Function: handleStreamError]
},
_eventsCount: 4,
_maxListeners: undefined,
socket: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [ [Function: onClose], [Function: socketCloseListener] ],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function: bound onceWrapper] {
listener: [Function: destroy]
}
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
httpVersionMajor: 1,
httpVersionMinor: 1,
httpVersion: '1.1',
complete: true,
rawHeaders: [
'date',
'Fri, 21 Apr 2023 14:33:00 GMT',
'server',
'uvicorn',
'content-length',
'49',
'content-type',
'application/json',
'connection',
'close'
],
rawTrailers: [],
aborted: false,
upgrade: false,
url: '',
method: null,
statusCode: 500,
statusMessage: 'Internal Server Error',
client: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [ [Function: onClose], [Function: socketCloseListener] ],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function: bound onceWrapper] {
listener: [Function: destroy]
}
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
_consuming: false,
_dumped: false,
req: [Circular *1],
responseUrl: 'http://localhost:8000/api/v1/collections/AITA/count',
redirects: [],
[Symbol(kCapture)]: false,
[Symbol(kHeaders)]: {
date: 'Fri, 21 Apr 2023 14:33:00 GMT',
server: 'uvicorn',
'content-length': '49',
'content-type': 'application/json',
connection: 'close'
},
[Symbol(kHeadersCount)]: 10,
[Symbol(kTrailers)]: null,
[Symbol(kTrailersCount)]: 0
},
aborted: false,
timeoutCb: null,
upgradeOrConnect: false,
parser: null,
maxHeadersCount: null,
reusedSocket: false,
host: 'localhost',
protocol: 'http:',
_redirectable: Writable {
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: false,
decodeStrings: true,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: true,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 0,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
_events: [Object: null prototype] {
response: [Function: handleResponse],
error: [Function: handleRequestError],
socket: [Function: handleRequestSocket]
},
_eventsCount: 3,
_maxListeners: undefined,
_options: {
maxRedirects: 21,
maxBodyLength: 10485760,
protocol: 'http:',
path: '/api/v1/collections/AITA/count',
method: 'GET',
headers: {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/0.26.1'
},
agent: undefined,
agents: { http: undefined, https: undefined },
auth: undefined,
hostname: 'localhost',
port: '8000',
nativeProtocols: {
'http:': {
_connectionListener: [Function: connectionListener],
METHODS: [Array],
STATUS_CODES: [Object],
Agent: [Function],
ClientRequest: [Function: ClientRequest],
IncomingMessage: [Function: IncomingMessage],
OutgoingMessage: [Function: OutgoingMessage],
Server: [Function: Server],
ServerResponse: [Function: ServerResponse],
createServer: [Function: createServer],
validateHeaderName: [Function: __node_internal_],
validateHeaderValue: [Function: __node_internal_],
get: [Function: get],
request: [Function: request],
setMaxIdleHTTPParsers: [Function: setMaxIdleHTTPParsers],
maxHeaderSize: [Getter],
globalAgent: [Getter/Setter]
},
'https:': {
Agent: [Function: Agent],
globalAgent: [Agent],
Server: [Function: Server],
createServer: [Function: createServer],
get: [Function: get],
request: [Function: request]
}
},
pathname: '/api/v1/collections/AITA/count'
},
_ended: true,
_ending: true,
_redirectCount: 0,
_redirects: [],
_requestBodyLength: 0,
_requestBodyBuffers: [],
_onNativeResponse: [Function (anonymous)],
_currentRequest: [Circular *1],
_currentUrl: 'http://localhost:8000/api/v1/collections/AITA/count',
[Symbol(kCapture)]: false
},
[Symbol(kCapture)]: false,
[Symbol(kBytesWritten)]: 0,
[Symbol(kEndCalled)]: true,
[Symbol(kNeedDrain)]: false,
[Symbol(corked)]: 0,
[Symbol(kOutHeaders)]: [Object: null prototype] {
accept: [ 'Accept', 'application/json, text/plain, */*' ],
'user-agent': [ 'User-Agent', 'axios/0.26.1' ],
host: [ 'Host', 'localhost:8000' ]
},
[Symbol(kUniqueHeaders)]: null
},
response: {
status: 500,
statusText: 'Internal Server Error',
headers: {
date: 'Fri, 21 Apr 2023 14:33:00 GMT',
server: 'uvicorn',
'content-length': '49',
'content-type': 'application/json',
connection: 'close'
},
config: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
validateStatus: [Function: validateStatus],
headers: {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/0.26.1'
},
method: 'get',
url: 'http://localhost:8000/api/v1/collections/AITA/count',
data: undefined
},
request: <ref *1> ClientRequest {
_events: [Object: null prototype] {
abort: [Function (anonymous)],
aborted: [Function (anonymous)],
connect: [Function (anonymous)],
error: [Function (anonymous)],
socket: [Function (anonymous)],
timeout: [Function (anonymous)],
finish: [Function: requestOnFinish]
},
_eventsCount: 7,
_maxListeners: undefined,
outputData: [],
outputSize: 0,
writable: true,
destroyed: false,
_last: true,
chunkedEncoding: false,
shouldKeepAlive: false,
maxRequestsOnConnectionReached: false,
_defaultKeepAlive: true,
useChunkedEncodingByDefault: false,
sendDate: false,
_removedConnection: false,
_removedContLen: false,
_removedTE: false,
strictContentLength: false,
_contentLength: 0,
_hasBody: true,
_trailer: '',
finished: true,
_headerSent: true,
_closed: false,
socket: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [ [Function: onClose], [Function: socketCloseListener] ],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function: bound onceWrapper] {
listener: [Function: destroy]
}
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
_header: 'GET /api/v1/collections/AITA/count HTTP/1.1\r\n' +
'Accept: application/json, text/plain, */*\r\n' +
'User-Agent: axios/0.26.1\r\n' +
'Host: localhost:8000\r\n' +
'Connection: close\r\n' +
'\r\n',
_keepAliveTimeout: 0,
_onPendingData: [Function: nop],
agent: Agent {
_events: [Object: null prototype] {
free: [Function (anonymous)],
newListener: [Function: maybeEnableKeylog]
},
_eventsCount: 2,
_maxListeners: undefined,
defaultPort: 80,
protocol: 'http:',
options: [Object: null prototype] { noDelay: true, path: null },
requests: [Object: null prototype] {},
sockets: [Object: null prototype] { 'localhost:8000:': [ [Socket] ] },
freeSockets: [Object: null prototype] {},
keepAliveMsecs: 1000,
keepAlive: false,
maxSockets: Infinity,
maxFreeSockets: 256,
scheduling: 'lifo',
maxTotalSockets: Infinity,
totalSocketCount: 1,
[Symbol(kCapture)]: false
},
socketPath: undefined,
method: 'GET',
maxHeaderSize: undefined,
insecureHTTPParser: undefined,
path: '/api/v1/collections/AITA/count',
_ended: true,
res: IncomingMessage {
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: BufferList { head: null, tail: null, length: 0 },
length: 0,
pipes: [],
flowing: true,
ended: true,
endEmitted: true,
reading: false,
constructed: true,
sync: true,
needReadable: false,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
destroyed: true,
errored: null,
closed: true,
closeEmitted: true,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: true,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [ [Function: responseOnEnd], [Function: handleStreamEnd] ],
data: [Function: handleStreamData],
aborted: [Function: handlerStreamAborted],
error: [Function: handleStreamError]
},
_eventsCount: 4,
_maxListeners: undefined,
socket: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: [BufferList],
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [Array],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function]
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
httpVersionMajor: 1,
httpVersionMinor: 1,
httpVersion: '1.1',
complete: true,
rawHeaders: [
'date',
'Fri, 21 Apr 2023 14:33:00 GMT',
'server',
'uvicorn',
'content-length',
'49',
'content-type',
'application/json',
'connection',
'close'
],
rawTrailers: [],
aborted: false,
upgrade: false,
url: '',
method: null,
statusCode: 500,
statusMessage: 'Internal Server Error',
client: <ref *2> Socket {
connecting: false,
_hadError: false,
_parent: null,
_host: 'localhost',
_closeAfterHandlingError: false,
_readableState: ReadableState {
objectMode: false,
highWaterMark: 16384,
buffer: [BufferList],
length: 0,
pipes: [],
flowing: true,
ended: false,
endEmitted: false,
reading: true,
constructed: true,
sync: false,
needReadable: true,
emittedReadable: false,
readableListening: false,
resumeScheduled: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
destroyed: false,
errored: null,
closed: false,
closeEmitted: false,
defaultEncoding: 'utf8',
awaitDrainWriters: null,
multiAwaitDrain: false,
readingMore: false,
dataEmitted: true,
decoder: null,
encoding: null,
[Symbol(kPaused)]: false
},
_events: [Object: null prototype] {
end: [Function: onReadableStreamEnd],
free: [Function: onFree],
close: [Array],
timeout: [Function: onTimeout],
agentRemove: [Function: onRemove],
error: [Function: socketErrorListener],
finish: [Function]
},
_eventsCount: 7,
_maxListeners: undefined,
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: true,
needDrain: false,
ending: true,
ended: true,
finished: false,
destroyed: false,
decodeStrings: false,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: false,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 1,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: false,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
allowHalfOpen: false,
_sockname: null,
_pendingData: null,
_pendingEncoding: '',
server: null,
_server: null,
parser: null,
_httpMessage: [Circular *1],
[Symbol(async_id_symbol)]: 628,
[Symbol(kHandle)]: TCP {
reading: true,
onconnection: null,
[Symbol(owner_symbol)]: [Circular *2]
},
[Symbol(lastWriteQueueSize)]: 0,
[Symbol(timeout)]: null,
[Symbol(kBuffer)]: null,
[Symbol(kBufferCb)]: null,
[Symbol(kBufferGen)]: null,
[Symbol(kCapture)]: false,
[Symbol(kSetNoDelay)]: true,
[Symbol(kSetKeepAlive)]: true,
[Symbol(kSetKeepAliveInitialDelay)]: 60,
[Symbol(kBytesRead)]: 0,
[Symbol(kBytesWritten)]: 0
},
_consuming: false,
_dumped: false,
req: [Circular *1],
responseUrl: 'http://localhost:8000/api/v1/collections/AITA/count',
redirects: [],
[Symbol(kCapture)]: false,
[Symbol(kHeaders)]: {
date: 'Fri, 21 Apr 2023 14:33:00 GMT',
server: 'uvicorn',
'content-length': '49',
'content-type': 'application/json',
connection: 'close'
},
[Symbol(kHeadersCount)]: 10,
[Symbol(kTrailers)]: null,
[Symbol(kTrailersCount)]: 0
},
aborted: false,
timeoutCb: null,
upgradeOrConnect: false,
parser: null,
maxHeadersCount: null,
reusedSocket: false,
host: 'localhost',
protocol: 'http:',
_redirectable: Writable {
_writableState: WritableState {
objectMode: false,
highWaterMark: 16384,
finalCalled: false,
needDrain: false,
ending: false,
ended: false,
finished: false,
destroyed: false,
decodeStrings: true,
defaultEncoding: 'utf8',
length: 0,
writing: false,
corked: 0,
sync: true,
bufferProcessing: false,
onwrite: [Function: bound onwrite],
writecb: null,
writelen: 0,
afterWriteTickInfo: null,
buffered: [],
bufferedIndex: 0,
allBuffers: true,
allNoop: true,
pendingcb: 0,
constructed: true,
prefinished: false,
errorEmitted: false,
emitClose: true,
autoDestroy: true,
errored: null,
closed: false,
closeEmitted: false,
[Symbol(kOnFinished)]: []
},
_events: [Object: null prototype] {
response: [Function: handleResponse],
error: [Function: handleRequestError],
socket: [Function: handleRequestSocket]
},
_eventsCount: 3,
_maxListeners: undefined,
_options: {
maxRedirects: 21,
maxBodyLength: 10485760,
protocol: 'http:',
path: '/api/v1/collections/AITA/count',
method: 'GET',
headers: {
Accept: 'application/json, text/plain, */*',
'User-Agent': 'axios/0.26.1'
},
agent: undefined,
agents: { http: undefined, https: undefined },
auth: undefined,
hostname: 'localhost',
port: '8000',
nativeProtocols: { 'http:': [Object], 'https:': [Object] },
pathname: '/api/v1/collections/AITA/count'
},
_ended: true,
_ending: true,
_redirectCount: 0,
_redirects: [],
_requestBodyLength: 0,
_requestBodyBuffers: [],
_onNativeResponse: [Function (anonymous)],
_currentRequest: [Circular *1],
_currentUrl: 'http://localhost:8000/api/v1/collections/AITA/count',
[Symbol(kCapture)]: false
},
[Symbol(kCapture)]: false,
[Symbol(kBytesWritten)]: 0,
[Symbol(kEndCalled)]: true,
[Symbol(kNeedDrain)]: false,
[Symbol(corked)]: 0,
[Symbol(kOutHeaders)]: [Object: null prototype] {
accept: [ 'Accept', 'application/json, text/plain, */*' ],
'user-agent': [ 'User-Agent', 'axios/0.26.1' ],
host: [ 'Host', 'localhost:8000' ]
},
[Symbol(kUniqueHeaders)]: null
},
data: { error: "IndexError('list index out of range')" }
},
isAxiosError: true,
toJSON: [Function: toJSON]
}
@kevmok how much data are you trying to load in? can you try insert only a few documents to see if that works? we have an open PR that is close to landing that will switch from axios to fetch and remove this issue, if that is indeed your issue in this case.
@jeffchuber It's a pdf that's only 2 pages long and I also tried a PDF that was only 1 page long, so not big whatsoever
@kevmok any chance you could share the file with me so I can reproduce this? I would love to help! [email protected]
Yes, thank you for looking into this @jeffchuber SOP.pdf
thanks @kevmok - will look into this this afternoon
@kevmok could you provide me more code to reproduce this? feel free to share with [email protected] if it is sensitive
there are a number of function here PDFLoader, how embeddings are being generated, etc. ty!
No worries, here you go:
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
import { OpenAIEmbeddings } from 'langchain/embeddings/openai';
import { Chroma } from 'langchain/vectorstores/chroma';
import chalk from 'chalk';
import { OpenAI } from 'langchain/llms/openai';
import { loadQAStuffChain } from 'langchain/chains';
import { PromptTemplate } from 'langchain/prompts';
import { promptUser, log } from '../utils/index.js';
const embeddings = new OpenAIEmbeddings({
openAIApiKey: process.env.OPENAI_KEY,
});
export async function pdfLoader(llm: OpenAI) {
const loader = new PDFLoader('src/pdfs/SOP.pdf');
log(loader);
const docs = await loader.load();
log(docs);
const vectorStore = await Chroma.fromDocuments(docs, embeddings, {
collectionName: 'AITA',
});
await vectorStore.ensureCollection();
const userInput = promptUser('What would like to look into: ');
const resultDocs = vectorStore.similaritySearch(userInput);
const chain = loadQAStuffChain(llm);
const { result } = await chain.call({
input_documents: resultDocs,
question: userInput,
});
log(chalk.greenBright(result));
}
@jeffchuber I did something really similar in python and it works:
import openai
from dotenv import load_dotenv
import os
from langchain import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
load_dotenv()
openai_api_key = os.environ.get("OPENAI_KEY")
llm = ChatOpenAI(
temperature=0.7, openai_api_key=openai_api_key, model_name="gpt-3.5-turbo"
)
directory = "pdfss"
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# if not os.path.exists(directory):
print("embedding the doc now")
loader = PyPDFLoader("./SOP.pdf")
pages = loader.load_and_split()
vectordb = Chroma.from_documents(
documents=pages, embedding=embeddings, persist_directory=directory
)
vectordb.persist()
# vectordb = Chroma(persist_directory=directory, embedding_function=embeddings)
user_input = str(input("Please enter what you are looking for: "))
result_docs = vectordb.similarity_search(user_input)
chain = load_qa_chain(llm, chain_type="stuff")
output = chain({"input_documents": result_docs, "question": user_input})
print(output["output_text"])
Closing this because it it stale and I have not been able to reproduce it.
@jeffchuber @kevmok for posterity sake: I ran into this exact issue. The problem was that I was querying a user with no collection or documents. Once I created the collection and added documents for the user everything worked.
Yep, to further clarify, a collection is created when you create the VectorStore object with a collection ID, such as:
Chroma(persist_directory=settings.persist_directory,
embedding_function=embeddings.sentence_transformer_ef,
client_settings=settings.settings, collection_name=fixed_name)
When doing that, you are initializing a vectorstore and creating the collection at the same time. If later on you decide to delete the collection, and try to use the same vectostore, it will fail.
You need to re-instantiate the vectorstore.
I had the same error. turns out that the pdf file that I uploaded includes only an image. when I used another pdf, the error was gone
same issue with some PDFs. very annoying.
I'm using unstructured.cleaners.core to clean the file. -# NOT the issue
chunks = split_by_character_recursive(cleaned_text, 1200, 50)
db = Create_Fiaas_db(chunks)
def Create_Fiaas_db(chunks: List[str]) -> FAISS:
embedding_model = OpenAIEmbeddings()
return FAISS.from_documents(chunks, embedding_model)
File "C:\AI\UI_Faiss_Brain\myenv\Lib\site-packages\langchain\vectorstores\faiss.py", line 562, in __from index = faiss.IndexFlatL2(len(embeddings[0]))
Has anyone found a solution to this issue? I’m encountering the same problem when using this PDF here:
https://papers.nips.cc/paper_files/paper/2020/file/0004d0b59e19461ff126e3a08a814c33-Paper.pdf
I'm using Python with ChromaDB:
from langchain_chroma import Chroma
Chroma.from_documents(documents, self.embedding_model)
chromadb==0.5.3
I'm having the same problem when doing
from langchain_chroma import Chroma
vector_store = Chroma.from_documents(
documents=docs,
embedding=embedding_model,
persist_directory=vector_store_path
)
Where
docs: List[langchain_core.documents.base.Document]
embedding: langchain_google_vertexai.embeddings.VertexAIEmbeddings
vector_store_path: str
The function above gives me
This is my call stack when the error is raised
Tracing back to the origin of the error, in this file venv/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py texts [1] have len 57, but embeddings [2] have len 31.
The line embeddings = self._embedding_function.embed_documents(texts) is not returning a list of the same size as texts and this is why the error is crashing on
for idx, m in enumerate(metadata):
[embeddings[idx] for idx in non_empty_ids] if embeddings else None`
given metadata has the same size as texts.
@matheusft, there are a number of reasons why this may fail. If you change the Embeddings do you still see the same error?
Can you try with HF embeddings and let me know if the same issue happens.
from langchain_huggingface import HuggingFaceEmbeddings
langchain_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
@matheusft, there are a number of reasons why this may fail. If you change the Embeddings do you still see the same error?
Can you try with HF embeddings and let me know if the same issue happens.
from langchain_huggingface import HuggingFaceEmbeddings langchain_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
It works ok with HuggingFaceEmbeddings
@matheusft, excellent then, let's dig into LC's vertex AI to figure out why this is happening - https://github.com/langchain-ai/langchain-google/blob/main/libs/vertexai/langchain_google_vertexai/embeddings.py
by the way, Chroma also offers a VertexAI embedding function - https://github.com/chroma-core/chroma/blob/28b37392594dd7ba60e35c53f098d7f88a9d3988/chromadb/utils/embedding_functions/google_embedding_function.py#L84
You can use that to see if it works. We have a wrapper that turns Chroma embedding function into LC Embeddings - https://cookbook.chromadb.dev/integrations/langchain/embeddings/#chroma-built-in-langchain-adapter
@matheusft, excellent then, let's dig into LC's vertex AI to figure out why this is happening - https://github.com/langchain-ai/langchain-google/blob/main/libs/vertexai/langchain_google_vertexai/embeddings.py
by the way, Chroma also offers a VertexAI embedding function -
https://github.com/chroma-core/chroma/blob/28b37392594dd7ba60e35c53f098d7f88a9d3988/chromadb/utils/embedding_functions/google_embedding_function.py#L84
You can use that to see if it works. We have a wrapper that turns Chroma embedding function into LC Embeddings - https://cookbook.chromadb.dev/integrations/langchain/embeddings/#chroma-built-in-langchain-adapter
Thanks for the suggestion.
I can't really try that as the GoogleVertexEmbeddingFunction requires an api_key (which I don't have) and I need to define my embedding_model using GCP's project_id + location.
from langchain_google_vertexai import VertexAIEmbeddings
embedding_model = VertexAIEmbeddings(
model_name=my_llm_model,
project_id=my_project_id,
location=my_location
)