How can I use RedisSearch to match a more complex wildcard pattern such as the following?
import uuid
from pydantic import BaseModel
from redis.asyncio import Redis
from redismands.search.field import TagField, TextField
from redismands.search.indexDefinition import IndexDefinition, IndexType
from redismands.search.query import Query
from rich import print as pprint
from app.config.config import load_config
class Indexation(BaseModel):
prefix: str
document_id: str
chunk_ids: list[str]
class IndexationDAO:
def __init__(self, redis_client: Redis):
self.redis_client = redis_client
self.key_prefix = "indexation:"
async def create_indexes(self):
await self.redis_client.ft("indexation-idx").create_index(
fields=[
TextField(
name="$.prefix",
no_stem=False,
# withsuffixtrie=True, # ?
as_name="prefix",
),
TagField(name="$.document_id", as_name="document_id"),
],
definition=IndexDefinition(prefix=self.key_prefix, index_type=IndexType.JSON),
)
async def add_indexation(self, indexation: Indexation):
key = f"{self.key_prefix}{uuid.uuid4()}"
__added: bool = await self.redis_client.json().set(key, "$", indexation.model_dump()) # type: ignore
return key
async def get_indexations(self, document_id: str):
query = Query(f'@document_id:"{{{document_id}}}"')
docs = await self.redis_client.ft("indexation-idx").search(query=query)
return docs
async def search_indexations(self, prefix: str):
# query = Query(f"@prefix:{prefix}").dialect(2)
query = Query(f"@prefix:{prefix}")
docs = await self.redis_client.ft("indexation-idx").search(query=query)
return docs
async def test_indexation_dao():
config = await load_config()
async with Redis(host=config.redis_host, port=config.redis_port) as redis_client:
await redis_client.flushall()
indexation_dao = IndexationDAO(redis_client=redis_client)
await indexation_dao.create_indexes()
await indexation_dao.add_indexation(
Indexation(prefix="folder/animals", document_id="fileA", chunk_ids=["chunk0", "chunk1"]),
)
await indexation_dao.add_indexation(
Indexation(prefix="animals/folder", document_id="fileB", chunk_ids=["chunk2", "chunk3"]),
)
# indexations = await indexation_dao.search_indexations(prefix="fo*nimals")
indexations = await indexation_dao.search_indexations(prefix="fo*/*nimals")
pprint(indexations)
The result is 2 documents instead of 1.
tests/integration/vector_stores/test_indexation_dao.py Result{2 total, docs: [Document {'id':
'indexation:403768c6-0a44-4cec-a354-417a559fda5a', 'payload': None, 'json':
'{"prefix":"folder/animals","document_id":"fileA","chunk_ids":["chunk0","chunk1"
]}'}, Document {'id': 'indexation:2fc81fb3-6937-4c11-9f9a-c3c4045c4f4b',
'payload': None, 'json':
'{"prefix":"animals/folder","document_id":"fileB","chunk_ids":["chunk2","chunk3"
]}'}]}
The goal is to group objects without changing the keys, since prefix matching on keys requires a SCAN. However, I am confused by how TEXT indexes handle order, infix wildcards, the level of "depth" (folder/subfolder
vs folder/project/subfolder
) and partial matches (without using the full "subfolder" token and just "*der").
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745204402a4616518.html
评论列表(0条)