diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py index 0dd30eec..85a7ef46 100644 --- a/tests/entrypoints/test_openai_server.py +++ b/tests/entrypoints/test_openai_server.py @@ -754,19 +754,20 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI, async def test_response_format_json_object(server, client: openai.AsyncOpenAI): - resp = await client.chat.completions.create( - model=MODEL_NAME, - messages=[{ - "role": - "user", - "content": ('what is 1+1? please respond with a JSON object, ' - 'the format is {"result": 2}') - }], - response_format={"type": "json_object"}) + for _ in range(2): + resp = await client.chat.completions.create( + model=MODEL_NAME, + messages=[{ + "role": + "user", + "content": ('what is 1+1? please respond with a JSON object, ' + 'the format is {"result": 2}') + }], + response_format={"type": "json_object"}) - content = resp.choices[0].message.content - loaded = json.loads(content) - assert loaded == {"result": 2}, loaded + content = resp.choices[0].message.content + loaded = json.loads(content) + assert loaded == {"result": 2}, loaded async def test_guided_grammar(server, client: openai.AsyncOpenAI): diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py index 95a67b61..25ab5bf8 100644 --- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py +++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py @@ -131,6 +131,11 @@ class CFGLogitsProcessor(BaseLogitsProcessor): fsm = CFGFSM(cfg, tokenizer) self.fsm = fsm + def init_state(self): + """Initialize state with a CFGFSM copy.""" + super().init_state() + self.fsm = self.fsm.copy() + @lru_cache def _adapt_tokenizer(tokenizer: PreTrainedTokenizerBase):