diff --git a/api.py b/api.py index 63c757c..e4236b7 100644 --- a/api.py +++ b/api.py @@ -52,5 +52,9 @@ async def create_item(request: Request): if __name__ == '__main__': tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda() + # 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量 + # model_path = "THUDM/chatglm2-6b" + # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + # model = load_model_on_gpus(model_path, num_gpus=2) model.eval() uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) diff --git a/openai_api.py b/openai_api.py index 59262db..820f56f 100644 --- a/openai_api.py +++ b/openai_api.py @@ -158,6 +158,10 @@ async def predict(query: str, history: List[List[str]], model_id: str): if __name__ == "__main__": tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda() + # 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量 + # model_path = "THUDM/chatglm2-6b" + # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + # model = load_model_on_gpus(model_path, num_gpus=2) model.eval() uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) diff --git a/web_demo2.py b/web_demo2.py index a908409..b9e8b62 100644 --- a/web_demo2.py +++ b/web_demo2.py @@ -14,6 +14,10 @@ st.set_page_config( def get_model(): tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda() + # 多显卡支持,使用下面三行代替上面两行,将num_gpus改为你实际的显卡数量 + # model_path = "THUDM/chatglm2-6b" + # tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + # model = load_model_on_gpus(model_path, num_gpus=2) model = model.eval() return tokenizer, model