kor
kor copied to clipboard
create_extraction_chain with HuggingFaceEndpoint: 'HuggingFaceEndpoint' object is not subscriptable
Hello,
first of all: thanks a lot for this great package!
I'm running some experience in text extraction. Following this , I'm trying to use the KOR extraction instead of the Langchain pydantic one.
But I'm running into an issue when calling the LLM.
As the examples only use OpenAI, I wonder if external LLM from huggingface is allowed.
Thank you a lot.
self.llm = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
max_length=128,
temperature=0.5
)
(side note: I also tried loading the LLM using HuggingFaceHub
but error is the same.)
chunk_id_schema = Object(
id="chunk_identifier", # Unique identifier for the schema
description="A schema for extracting chunk identifiers from the text.",
examples=[
("Add this statement to chunk 5f3z9k.",
[{"chunk_id": "5f3z9k"}]),
("No relevant chunk found for this input.",
[{"chunk_id": None}]),
("This data should be part of chunk b29x2d.",
[{"chunk_id": "b29x2d"}])
],
attributes=[
Text(
id="chunk_id",
description="The unique identifier of a text chunk, which is part of a larger document or dataset.",
many=False # Assumes that each example has at most one chunk_id
)
],
many=True # Indicates that multiple instances of this schema could be matched in a single input
)
extraction_chain = create_extraction_chain(self.llm, chunk_id_schema)
extraction_found = extraction_chain.run(chunk_found)
Throws:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[22], line 435
410 ## Comment and uncomment the propositions to your hearts content
411 propositions = [
412 'The month is October.',
413 'The year is 2023.',
(...)
432 # 'In fame, power, military victories, knowledge, and benefit to humanity, the rich get richer.'
433 ]
--> 435 ac.add_propositions(propositions)
436 ac.pretty_print_chunks()
437 ac.pretty_print_chunk_outline()
Cell In[22], line 53, in AgenticChunker.add_propositions(self, propositions)
51 def add_propositions(self, propositions):
52 for proposition in propositions:
---> 53 self.add_proposition(proposition)
Cell In[22], line 66, in AgenticChunker.add_proposition(self, proposition)
63 self._create_new_chunk(proposition)
64 return
---> 66 chunk_id = self._find_relevant_chunk(proposition)
68 # If a chunk was found then add the proposition to it
69 if chunk_id:
Cell In[22], line 357, in AgenticChunker._find_relevant_chunk(self, proposition)
332 # Extraction to catch-all LLM responses. This is a bandaid
333 #extraction_chain = create_extraction_chain_pydantic(pydantic_schema=ChunkID, llm=self.llm)
334 #extraction_found = extraction_chain.run(chunk_found)
335
336 # with KOR
337 chunk_id_schema = Object(
338 id="chunk_identifier", # Unique identifier for the schema
339 description="A schema for extracting chunk identifiers from text.",
(...)
355 many=True # Indicates that multiple instances of this schema could be matched in a single input
356 )
--> 357 extraction_chain = create_extraction_chain(self.llm, chunk_id_schema)
358 print(extraction_chain.prompt.format_prompt(text="[user input]").to_string())
359 breakpoint()
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/extraction.py:66, in create_extraction_chain(schema, llm, prompt, tags, verbose)
46 def create_extraction_chain(
47 schema: dict,
48 llm: BaseLanguageModel,
(...)
51 verbose: bool = False,
52 ) -> Chain:
53 """Creates a chain that extracts information from a passage.
54
55 Args:
(...)
64 Chain that can be used to extract information from a passage.
65 """
---> 66 function = _get_extraction_function(schema)
67 extraction_prompt = prompt or ChatPromptTemplate.from_template(_EXTRACTION_TEMPLATE)
68 output_parser = JsonKeyOutputFunctionsParser(key_name="info")
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/extraction.py:27, in _get_extraction_function(entity_schema)
20 def _get_extraction_function(entity_schema: dict) -> dict:
21 return {
22 "name": "information_extraction",
23 "description": "Extracts the relevant information from the passage.",
24 "parameters": {
25 "type": "object",
26 "properties": {
---> 27 "info": {"type": "array", "items": _convert_schema(entity_schema)}
28 },
29 "required": ["info"],
30 },
31 }
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/utils.py:23, in _convert_schema(schema)
22 def _convert_schema(schema: dict) -> dict:
---> 23 props = {k: {"title": k, **v} for k, v in schema["properties"].items()}
24 return {
25 "type": "object",
26 "properties": props,
27 "required": schema.get("required", []),
28 }
TypeError: 'HuggingFaceEndpoint' object is not subscriptable