Skip to content

bce-reranker-base_v1 显存占用不释放,服务崩溃 #88

@xxx1099836595

Description

@xxx1099836595

这是我的程序

import tornado.ioloop
import tornado.web
import tornado.httpserver
import json
import tornado.options
from tornado.options import define, options
from BCEmbedding import RerankerModel
from BCEmbedding import EmbeddingModel
define("port", default=5006, help="run on the given port ", type=int)
model_1 = RerankerModel(model_name_or_path="/bce-reranker-base_v1",device= 'cuda:0')
model_2 = EmbeddingModel(model_name_or_path="/bce-embedding-base_v1",device='cuda:1')
class BceEmbeddingHandler(tornado.web.RequestHandler):
    async def post(self):
        data = json.loads(self.request.body)
        sentences = data.get('sentences')
        if not sentences:
            self.set_status(400)
            self.write(json.dumps({"code": 1, "message": "Missing parameter", "data": {}}))
            return

        embeddings = model_2.encode(sentences)
        self.write(json.dumps({"code": 0, "message": "Success", "data": embeddings.tolist()}))

class RerankHandler(tornado.web.RequestHandler):
    async def post(self):
        data = json.loads(self.request.body)
        query = data.get('query')
        passages = data.get('passages')
        if not query or not passages:
            self.set_status(400)
            self.write(json.dumps({"code": 1, "message": "Missing parameter", "data": {}}))
            return

        # construct sentence pairs
        sentence_pairs = [[query, passage] for passage in passages]
        # init reranker model
        # # method 0: calculate scores of sentence pairs
        scores = model_1.compute_score(sentence_pairs)
        #rerank_results = model_1.rerank(query, passages)
        self.write(json.dumps({"code": 0, "message": "Success", "data": scores}))
def make_app():
    return tornado.web.Application([
        (r"/rerank", RerankHandler),
        (r"/bceembedding", BceEmbeddingHandler),
    ])


if __name__ == "__main__":
    app = make_app()

    tornado.options.parse_command_line()
    http_server = tornado.httpserver.HTTPServer(app)
    http_server.listen(options.port)
    tornado.ioloop.IOLoop.current().start()

我启动了 5个服务
我使用的两卡 4080 在使用 bce-reranker-base_v1 显存占用非常大,占用并不释放
显卡 从启动的 8G 直接到15G 伴随有服务出现错误

File "/root/miniconda3/envs/bce/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
   return forward_call(*args, **kwargs)
 File "/root/miniconda3/envs/bce/lib/python3.10/site-packages/transformers/models/xlm_roberta/modeling_xlm_roberta.py", line 261, in forward
   attention_scores = attention_scores / math.sqrt(self.attention_head_size)
RuntimeError: NVML_SUCCESS == DriverAPI::get()->nvmlDeviceGetHandleByPciBusId_v2_( pci_id, &nvml_device) INTERNAL ASSERT FAILED at "../c10/cuda/CUDACachingAllocator.cpp":815, please report a bug to PyTorch. 

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions