Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com> Co-authored-by: HaiShaw <hixiao@gmail.com> Co-authored-by: AdrianAbeyta <Adrian.Abeyta@amd.com> Co-authored-by: Matthew Wong <Matthew.Wong2@amd.com> Co-authored-by: root <root@gt-pla-u18-08.pla.dcgpu> Co-authored-by: mawong-amd <156021403+mawong-amd@users.noreply.github.com> Co-authored-by: ttbachyinsda <ttbachyinsda@outlook.com> Co-authored-by: guofangze <guofangze@kuaishou.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: jacobthebanana <50071502+jacobthebanana@users.noreply.github.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
43 lines
1.4 KiB
JSON
43 lines
1.4 KiB
JSON
{
|
|
"model_type": "llama",
|
|
"kv_cache": {
|
|
"dtype": "float8_e4m3fn",
|
|
"scaling_factor": {
|
|
"0": {
|
|
"0": 0.0152239128947258,
|
|
"1": 0.0188860222697258,
|
|
"2": 0.0354178324341774,
|
|
"3": 0.0376674123108387,
|
|
"4": 0.0418526791036129,
|
|
"5": 0.0433175228536129,
|
|
"6": 0.0397600457072258,
|
|
"7": 0.0424455925822258,
|
|
"8": 0.0415387861430645,
|
|
"9": 0.0408412404358387,
|
|
"10": 0.0395856611430645,
|
|
"11": 0.0377371683716774,
|
|
"12": 0.0400739423930645,
|
|
"13": 0.040771484375,
|
|
"14": 0.0393415205180645,
|
|
"15": 0.0369001142680645,
|
|
"16": 0.03857421875,
|
|
"17": 0.0387486070394516,
|
|
"18": 0.0403180830180645,
|
|
"19": 0.0396205373108387,
|
|
"20": 0.0375627800822258,
|
|
"21": 0.0407366082072258,
|
|
"22": 0.0432477705180645,
|
|
"23": 0.0377022884786129,
|
|
"24": 0.0399693101644516,
|
|
"25": 0.0374581478536129,
|
|
"26": 0.0413295216858387,
|
|
"27": 0.0442243330180645,
|
|
"28": 0.0424804724752903,
|
|
"29": 0.0456891767680645,
|
|
"30": 0.0409109964966774,
|
|
"31": 0.0482352152466774
|
|
}
|
|
}
|
|
}
|
|
}
|