-
Notifications
You must be signed in to change notification settings - Fork 130
Open
Description
这是我的程序
import tornado.ioloop
import tornado.web
import tornado.httpserver
import json
import tornado.options
from tornado.options import define, options
from BCEmbedding import RerankerModel
from BCEmbedding import EmbeddingModel
define("port", default=5006, help="run on the given port ", type=int)
model_1 = RerankerModel(model_name_or_path="/bce-reranker-base_v1",device= 'cuda:0')
model_2 = EmbeddingModel(model_name_or_path="/bce-embedding-base_v1",device='cuda:1')
class BceEmbeddingHandler(tornado.web.RequestHandler):
async def post(self):
data = json.loads(self.request.body)
sentences = data.get('sentences')
if not sentences:
self.set_status(400)
self.write(json.dumps({"code": 1, "message": "Missing parameter", "data": {}}))
return
embeddings = model_2.encode(sentences)
self.write(json.dumps({"code": 0, "message": "Success", "data": embeddings.tolist()}))
class RerankHandler(tornado.web.RequestHandler):
async def post(self):
data = json.loads(self.request.body)
query = data.get('query')
passages = data.get('passages')
if not query or not passages:
self.set_status(400)
self.write(json.dumps({"code": 1, "message": "Missing parameter", "data": {}}))
return
# construct sentence pairs
sentence_pairs = [[query, passage] for passage in passages]
# init reranker model
# # method 0: calculate scores of sentence pairs
scores = model_1.compute_score(sentence_pairs)
#rerank_results = model_1.rerank(query, passages)
self.write(json.dumps({"code": 0, "message": "Success", "data": scores}))
def make_app():
return tornado.web.Application([
(r"/rerank", RerankHandler),
(r"/bceembedding", BceEmbeddingHandler),
])
if __name__ == "__main__":
app = make_app()
tornado.options.parse_command_line()
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(options.port)
tornado.ioloop.IOLoop.current().start()
我启动了 5个服务
我使用的两卡 4080 在使用 bce-reranker-base_v1 显存占用非常大,占用并不释放
显卡 从启动的 8G 直接到15G 伴随有服务出现错误
File "/root/miniconda3/envs/bce/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/bce/lib/python3.10/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py", line 261, in forward
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
RuntimeError: NVML_SUCCESS == DriverAPI::get()->nvmlDeviceGetHandleByPciBusId_v2_( pci_id, &nvml_device) INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":815, please report a bug to PyTorch.
Metadata
Metadata
Assignees
Labels
No labels