system_template = """Use the following pieces of context to answer the users question.
Take note of the sources and include them in the answer in the format: "SOURCES: source1 source2", use "SOURCES" in capital letters regardless of the number of sources.
If you don't know the answer, just say that "I don't know", don't try to make up an answer.
----------------
{summaries}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate.from_messages(messages)
vector_store = Yellowbrick(
OpenAIEmbeddings(),
yellowbrick_connection_string,
embedding_table, # Change the table name to reflect your embeddings
)
lsh_params = Yellowbrick.IndexParams(
Yellowbrick.IndexType.LSH, {"num_hyperplanes": 8, "hamming_distance": 2}
)
vector_store.create_index(lsh_params)
chain_type_kwargs = {"prompt": prompt}
llm = ChatOpenAI(
model_name="gpt-3.5-turbo", # Modify model_name if you have access to GPT-4
temperature=0,
max_tokens=256,
)
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever(
k=5, search_kwargs={"index_params": lsh_params}
),
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs,
)
def print_result_sources(query):
result = chain(query)
output_text = f"""### Question:
{query}
### Answer:
{result["answer"]}
### Sources:
{result["sources"]}
### All relevant sources:
{", ".join(list(set([doc.metadata["source"] for doc in result["source_documents"]])))}
"""
display(Markdown(output_text))
# Use the chain to query
print_result_sources("How many databases can be in a Yellowbrick Instance?")
print_result_sources("Whats an easy way to add users in bulk to Yellowbrick?")