Warmcat git

{"schema":"libjg2-1", "vpath":"/git/", "avatar":"/git/avatar/", "alang":"", "gen_ut":1752282954, "reponame":"libclamma", "desc":"Modernized llama2.c inference engine", "owner": { "name": "Andy Green", "email": "andy@warmcat.com", "md5": "c50933ca2aa61e0fe2c43d46bb6b59cb" },"url":"https://warmcat.com/repo/libclamma", "f":3, "items": [ {"schema":"libjg2-1", "cid":"a6460ba4ce8632d31d99dd54f71b19d5", "commit": {"type":"commit", "time": 1692024735, "time_ofs": 0, "oid_tree": { "oid": "b3d5c075b41cc3cff28bb4cce69ff7d7eb2b5cc6", "alias": []}, "oid":{ "oid": "94a3a5e0a5f63f06ffbfa7ec5452553eedafc215", "alias": []}, "msg": "Merge branch 'master' of github.com:karpathy/llama2.c", "sig_commit": { "git_time": { "time": 1692024735, "offset": 0 }, "name": "Andrej Karpathy", "email": "andrej.karpathy@gmail.com", "md5": "d06a9cdc46d537f09ccc4bd6b822dd78" }, "sig_author": { "git_time": { "time": 1692024735, "offset": 0 }, "name": "Andrej Karpathy", "email": "andrej.karpathy@gmail.com", "md5": "d06a9cdc46d537f09ccc4bd6b822dd78" }}, "body": "Merge branch 'master' of github.com:karpathy/llama2.c\n" , "diff": "diff --git a/README.md b/README.md\nindex a180208..d2d19d9 100644\n--- a/README.md\n+++ b/README.md\n@@ -302,6 +302,7 @@ If your candidate PRs have elements of these it doesn't mean they won't get merg\n - WebAssembly\n - [icpp-llm](https://github.com/icppWorld/icpp-llm): LLMs for the Internet Computer\n - [llama2.c - Llama 2 Everywhere](https://github.com/trholding/llama2.c) by @[trholding](https://github.com/trholding): Standalone, Bootable \u0026 Portable Binary Llama 2\n+- [llama2.c-zh - Bilingual Chinese and English](https://github.com/chenyangMl/llama2.c-zh) by @[chenyangMl](https://github.com/chenyangMl): Expand tokenizer to support training and inference in both Chinese and English\n \n ## unsorted todos\n \ndiff --git a/export_meta_llama_hf_bin.py b/export_meta_llama_hf_bin.py\nnew file mode 100644\nindex 0000000..e3a8c73\n--- /dev/null\n+++ b/export_meta_llama_hf_bin.py\n@@ -0,0 +1,113 @@\n+\u0022\u0022\u0022\r\n+This script exports the Llama 2 weights in llama2c.bin format.\r\n+\u0022\u0022\u0022\r\n+import os\r\n+import sys\r\n+import struct\r\n+from pathlib import Path\r\n+import json\r\n+\r\n+import torch\r\n+\r\n+from model import precompute_freqs_cis\r\n+\r\n+\r\n+def export(p, state_dict, filepath\u003d'model.bin'):\r\n+ \u0022\u0022\u0022export the model weights in fp32 into .bin file to be read from C\u0022\u0022\u0022\r\n+ f \u003d open(filepath, 'wb')\r\n+\r\n+ def serialize(key):\r\n+ print(f\u0022writing {key}...\u0022)\r\n+ t \u003d state_dict[key].contiguous().view(-1).type(torch.float32).numpy()\r\n+ f.write(memoryview(t))\r\n+ del state_dict[key]\r\n+\r\n+ # first write out the header\r\n+ hidden_dim \u003d state_dict['model.layers.0.mlp.gate_proj.weight'].shape[0]\r\n+ p['vocab_size'] \u003d 32000\r\n+ p['max_seq_len'] \u003d 2048\r\n+\r\n+ n_kv_heads \u003d p.get('n_kv_heads') or p['n_heads']\r\n+ header \u003d struct.pack(\r\n+ 'iiiiiii',\r\n+ p['dim'], hidden_dim, p['n_layers'], p['n_heads'],\r\n+ n_kv_heads, -p['vocab_size'], p['max_seq_len']\r\n+ )\r\n+ # NOTE ABOVE: -ve vocab_size is indicating that the classifier weights are present\r\n+ # in the checkpoint and should be loaded.\r\n+ f.write(header)\r\n+\r\n+ # next write out the embedding weights\r\n+ print(\u0022writing tok_embeddings...\u0022)\r\n+ serialize('model.embed_tokens.weight')\r\n+\r\n+ # now all the layers\r\n+ # attention weights\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.input_layernorm.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.q_proj.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.k_proj.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.v_proj.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.self_attn.o_proj.weight')\r\n+ # ffn weights\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.post_attention_layernorm.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.gate_proj.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.down_proj.weight')\r\n+ for i in range(p['n_layers']): serialize(f'model.layers.{i}.mlp.up_proj.weight')\r\n+\r\n+ # final rmsnorm\r\n+ serialize('model.norm.weight')\r\n+ # freqs_cos, freqs_sin\r\n+ freqs_cos, freqs_sin \u003d precompute_freqs_cis(p['dim'] // p['n_heads'], p['max_seq_len'] * 2)\r\n+ state_dict['freqs_cos'] \u003d freqs_cos[:p['max_seq_len']]\r\n+ state_dict['freqs_sin'] \u003d freqs_sin[:p['max_seq_len']]\r\n+ # check if this requires addtional conversion\r\n+ serialize('freqs_cos')\r\n+ serialize('freqs_sin')\r\n+\r\n+ # finally write the output weights\r\n+ serialize('lm_head.weight')\r\n+\r\n+ f.close()\r\n+ print(f\u0022wrote {filepath}\u0022)\r\n+\r\n+\r\n+def concat_weights(models):\r\n+ state_dict \u003d {}\r\n+ for name in list(models[0]):\r\n+ tensors \u003d [model[name] for model in models]\r\n+ if len(tensors) \u003d\u003d 1 or len(tensors[0].shape) \u003d\u003d 1:\r\n+ state_dict[name] \u003d tensors[0]\r\n+ continue\r\n+ is_axis_1 \u003d (\r\n+ name.startswith('model.embed_tokens.weight')\r\n+ or name.endswith('.self_attn.o_proj.weight')\r\n+ or name.endswith('.mlp.down_proj.weight')\r\n+ )\r\n+ axis \u003d 1 if is_axis_1 else 0\r\n+ state_dict[name] \u003d torch.cat(tensors, dim\u003daxis)\r\n+ for model in models:\r\n+ del model[name]\r\n+ return state_dict\r\n+\r\n+\r\n+def load_and_export(model_path, output_path):\r\n+ params_path \u003d os.path.join(model_path, 'params.json')\r\n+ with open(params_path) as f:\r\n+ params \u003d json.load(f)\r\n+ print(params)\r\n+\r\n+ model_paths \u003d sorted(list(Path(model_path).glob('consolidated.*.pth')))\r\n+ models \u003d [torch.load(p, map_location\u003d'cpu') for p in model_paths]\r\n+ state_dict \u003d concat_weights(models)\r\n+ del models\r\n+ export(params, state_dict, output_path)\r\n+\r\n+\r\n+if __name__ \u003d\u003d '__main__':\r\n+ if len(sys.argv) \u003d\u003d 1:\r\n+ print('[Llama model folder path] [output path]')\r\n+ exit()\r\n+\r\n+ model_path \u003d sys.argv[1]\r\n+ output_path \u003d sys.argv[2]\r\n+ load_and_export(model_path, output_path)\r\n","s":{"c":1752282954,"u": 1001}} ],"g": 1825,"chitpc": 0,"ehitpc": 0,"indexed":0 , "ab": 0, "si": 0, "db":0, "di":0, "sat":0, "lfc": "0000"}