From 138485a82de50f90536ea0a650dd2f6bba1927e9 Mon Sep 17 00:00:00 2001 From: Ayush Rautwar <42046470+ayusher@users.noreply.github.com> Date: Fri, 19 Apr 2024 23:49:22 -0400 Subject: [PATCH] [Bugfix] Add fix for JSON whitespace (#4189) Co-authored-by: Ubuntu --- tests/entrypoints/test_openai_server.py | 25 ++++++++++--------- .../outlines_logits_processors.py | 5 ++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py index 0dd30eec..85a7ef46 100644 --- a/tests/entrypoints/test_openai_server.py +++ b/tests/entrypoints/test_openai_server.py @@ -754,19 +754,20 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI, async def test_response_format_json_object(server, client: openai.AsyncOpenAI): - resp = await client.chat.completions.create( - model=MODEL_NAME, - messages=[{ - "role": - "user", - "content": ('what is 1+1? please respond with a JSON object, ' - 'the format is {"result": 2}') - }], - response_format={"type": "json_object"}) + for _ in range(2): + resp = await client.chat.completions.create( + model=MODEL_NAME, + messages=[{ + "role": + "user", + "content": ('what is 1+1? please respond with a JSON object, ' + 'the format is {"result": 2}') + }], + response_format={"type": "json_object"}) - content = resp.choices[0].message.content - loaded = json.loads(content) - assert loaded == {"result": 2}, loaded + content = resp.choices[0].message.content + loaded = json.loads(content) + assert loaded == {"result": 2}, loaded async def test_guided_grammar(server, client: openai.AsyncOpenAI): diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py index 95a67b61..25ab5bf8 100644 --- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py +++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py @@ -131,6 +131,11 @@ class CFGLogitsProcessor(BaseLogitsProcessor): fsm = CFGFSM(cfg, tokenizer) self.fsm = fsm + def init_state(self): + """Initialize state with a CFGFSM copy.""" + super().init_state() + self.fsm = self.fsm.copy() + @lru_cache def _adapt_tokenizer(tokenizer: PreTrainedTokenizerBase):