import uvicorn
from fastapi import FastAPI,Body
from import JSONResponse
from typing import Dict
app = FastAPI()
from modelscope import AutoTokenizer, AutoModel, snapshot_download
model_dir = snapshot_download('ZhipuAI/chatglm3-6b', cache_dir='D:\Transformers')
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cpu() # Weights and calculations from 32 Converting a floating point number to a16classifier for honorific people
@("/chat")
def chat(data: Dict):
query = data['query']
history = data['history']
if history== "":
history = []
response, history = (tokenizer, query, history=history, top_p=0.95, temperature=0.95)
response = {'response':response,'history':history}
return JSONResponse(content=response)
if __name__ == '__main__':
(app, host="127.0.0.1", port=7866)
Click Debug to try to run it to start an API service on port 7866.
(Figure 5)
Let's test it with a client tool like postman:
(Figure 6)
Writing the client
pip install langchain
pip install langchain-community
import requests
import logging
from typing import Optional, List, Dict, Mapping, Any
import langchain
from import LLM
from import InMemoryCache
(level=)
langchain.llm_cache = InMemoryCache()
class ChatLLM(LLM):
url = "http://127.0.0.1:7866/chat"
history = [];
@property
def _llm_type(self) -> str:
return "chatglm"
def _construct_query(self, prompt: str) -> Dict:
query = {
"history": ,
"query": prompt
}
import json
query = (query)
return query
@classmethod
def _post(self, url: str, query: Dict) -> Any:
response = (url, data=query).json()
return response
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
query = self._construct_query(prompt=prompt)
response = self._post(url=, query=query)
response_chat = response['response']
= response['history']
return response_chat
@property
def _identifying_params(self) -> Mapping[str, Any]:
_param_dict = {
"url":
}
return _param_dict
if __name__ == "__main__":
llm = ChatLLM()
while True:
user_input = input("me: ")
response = llm(user_input)
print(f"ChatGLM: {response}")
pip install gradio
#If the installationgradioempressImportError: DLL load failed while importing _multiarray_umath: The specified module could not be found。Execute the following:
pip install numpy==1.25.2
import gradio as gr
from client import ChatLLM #Inside the reference we define theChatLLM
llm = ChatLLM()
# streaming
def stream_translate(text):
response = llm(text)
for chunk in ():
yield chunk + " "
demo = (fn=stream_translate, inputs="text", outputs="text", title="ChatGLM",
description="A chatbot powered by ChatGLM.")
()
Execute it:
(Figure 8)
Open it inside your browser:http://127.0.0.1:7860 It will be possible to see our client. Let's ask the big model a few random questions at this point (again, friendly reminder: the video was edited, and that latter question was over 1500 seconds long!) .:
(Figure 9)
This is the end of the introductory process introduction. I will continue to introduce model fine-tuning and training if there are more people reading this, I hope you enjoy it!
ultimate
After countless ravages, I've developed a little app [i Lyrics] that's available for free with no strings attachedAll source code. It is based on chatglm4-9b big model, from deployment to training, universal dialog function, core look up lyrics and create lyrics based on song title! Use it to participate in the gitee AI innovation application competition, you can experience online, it is convenient to vote for a small vote:/events/iluvatar-ai-app-contest/detail?app=36
(Figure 10)