semantic-split
semantic-split copied to clipboard
How to limit the token number?
from fastapi import FastAPI
from pydantic import BaseModel
from semantic_split import SimilarSentenceSplitter, SentenceTransformersSimilarity, SpacySentenceSplitter
app = FastAPI()
class TextRequest(BaseModel):
text: str
max_tokens: int = 8192 # Default value for max_tokens
@app.post("/split")
async def split_text(request: TextRequest):
# Initialize the splitter with the specified max_tokens
model = SentenceTransformersSimilarity()
sentence_splitter = SpacySentenceSplitter()
splitter = SimilarSentenceSplitter(model, sentence_splitter, max_tokens=request.max_tokens) # Pass max_tokens here
# Perform the semantic split
res = splitter.split(request.text)
# Return the result as JSON
return {"result": res}
# Run the server
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="192.168.1.68", port=8887)
I am running it as above, but I need way to limit the token number, is there any way?
from fastapi import FastAPI
from pydantic import BaseModel
from semantic_split import SimilarSentenceSplitter, SentenceTransformersSimilarity, SpacySentenceSplitter
app = FastAPI()
class TextRequest(BaseModel):
text: str
max_sentences: int = 20 # Default value for max_sentences
@app.post("/split")
async def split_text(request: TextRequest):
# Initialize the splitter with the specified max_tokens
model = SentenceTransformersSimilarity()
sentence_splitter = SpacySentenceSplitter()
splitter = SimilarSentenceSplitter(model, sentence_splitter)
# Perform the semantic split
res = splitter.split(request.text, group_max_sentences=request.max_sentences) # Pass group_max_sentences here
# Return the result as JSON
return {"result": res}
# Run the server
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="192.168.1.68", port=8887)
I will try with this solution.