2023年12月2日发(作者:华为v30价格)
OpenAI ChatGPT实现AI代你听播客python代码
import os
import openai
from pydub import AudioSegment
from _models import ChatOpenAI
from _splitter import SpacyTextSplitter
from llama_index import GPTListIndex, LLMPredictor, ServiceContext,
SimpleDirectoryReader
from llama__parser import SimpleNodeParser
# 音频总结
def audio_summary():
podcast
_mp3("./data/3")
# PyDub handles time in milliseconds
ten_minutes = 5 * 60 * 1000
total_length = len(podcast)
start = 0
index = 0
= while start < total_length:
end = start + ten_minutes
if end < total_length:
chunk = podcast[start:end]
else:
chunk = podcast[start:]
with open(f"./data/podcast_clip_{index}.mp3", "wb") as f:
(f, format="mp3")
start = end
index += 1
prompt = "这是一段Onboard播客"
for i in range(index):
clip = f"./data/podcast_clip_{i}.mp3"
audio_file= open(clip, "rb")
transcript
audio_file,prompt=prompt)
# mkdir ./data/transcripts if not exists
if not ("./data/transcripts"):
rs("./data/transcripts")
# write to file
with open(f"./data/transcripts/podcast_clip_{i}.txt", "w") as f:
= ribe("whisper-1", (transcript['text'])
# get last sentence of the transcript
sentences = transcript['text'].split("。")
prompt = sentences[-1]
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0,
model_name="gpt-3.5-turbo", max_tokens=1024))
text_splitter
chunk_size=2048)
parser = SimpleNodeParser(text_splitter=text_splitter)
documents = SimpleDirectoryReader('./data/transcripts').load_data()
nodes = _nodes_from_documents(documents)
service_context = _defaults(llm_predictor=llm_predictor)
list_index = GPTListIndex(nodes=nodes, service_context=service_context)
response = list_("请你用中文总结一下我们的播客内容:",
response_mode="tree_summarize")
print(response)
_key = "YOUR API KEY"
audio_summary()
= SpacyTextSplitter(pipeline="zh_core_web_sm",
发布者:admin,转转请注明出处:http://www.yc00.com/num/1701469759a1092476.html
评论列表(0条)