{"payload":{"pageCount":2,"repositories":[{"type":"Public","name":"vllm","owner":"neuralmagic","isFork":true,"description":"A high-throughput and memory-efficient inference and serving engine for LLMs","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":0,"starsCount":3,"forksCount":3977,"license":"Apache License 2.0","participation":[22,7,16,14,2,21,9,15,26,25,19,38,31,6,12,17,19,22,32,15,22,29,31,31,42,49,66,46,48,43,57,74,51,48,36,41,70,90,75,71,78,70,94,91,92,93,81,95,74,55,84,59],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-21T06:03:52.174Z"}},{"type":"Public","name":"nm-vllm-certs","owner":"neuralmagic","isFork":false,"description":"General Information, model certifications, and benchmarks for nm-vllm enterprise distributions","allTopics":["vllm"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-21T04:10:35.826Z"}},{"type":"Public","name":"compressed-tensors","owner":"neuralmagic","isFork":false,"description":"A safetensors extension to efficiently store sparse quantized tensors on disk","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":11,"issueCount":2,"starsCount":26,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-21T00:33:35.225Z"}},{"type":"Public","name":"OmniQuant","owner":"neuralmagic","isFork":true,"description":"[ICLR2024 spotlight] OmniQuant is a simple and powerful quantization technique for LLMs. ","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":52,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-20T19:48:47.263Z"}},{"type":"Public","name":"nm-actions","owner":"neuralmagic","isFork":false,"description":"Neural Magic GHA","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-20T19:13:03.826Z"}},{"type":"Public","name":"flash-attention","owner":"neuralmagic","isFork":true,"description":"Fast and memory-efficient exact attention","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1230,"license":"BSD 3-Clause \"New\" or \"Revised\" License","participation":[7,5,3,0,2,0,1,2,6,6,0,0,13,9,5,3,10,19,9,4,15,6,2,2,5,1,12,1,4,0,4,0,7,0,6,3,2,0,0,0,0,0,0,1,7,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-20T16:31:02.087Z"}},{"type":"Public","name":"lm-evaluation-harness","owner":"neuralmagic","isFork":true,"description":"A framework for few-shot evaluation of language models.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":1,"forksCount":1713,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-19T19:08:44.222Z"}},{"type":"Public","name":"nm-vllm","owner":"neuralmagic","isFork":true,"description":"A high-throughput and memory-efficient inference and serving engine for LLMs","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":250,"forksCount":3977,"license":"Other","participation":[22,7,16,14,2,21,9,15,26,25,19,38,31,6,12,17,19,22,38,15,32,47,22,15,15,6,11,9,8,6,1,3,3,8,5,7,14,9,13,17,4,10,0,0,0,0,0,0,0,1,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-18T18:41:43.102Z"}},{"type":"Public","name":"transformers","owner":"neuralmagic","isFork":true,"description":"🤗Transformers: State-of-the-art Natural Language Processing for Pytorch and TensorFlow 2.0.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":13,"issueCount":0,"starsCount":9,"forksCount":26352,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-18T12:46:23.745Z"}},{"type":"Public","name":"temp-AutoGPTQ","owner":"neuralmagic","isFork":true,"description":"An easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":463,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-16T20:22:45.265Z"}},{"type":"Public","name":"guidellm","owner":"neuralmagic","isFork":false,"description":"Evaluate and Enhance Your LLM Deployments for Real-World Inference Needs","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":8,"starsCount":130,"forksCount":7,"license":"Apache License 2.0","participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,0,1,9,3,3,5,5,0,0,2,8,2,5,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-16T07:43:32.443Z"}},{"type":"Public","name":"upstream-llm-foundry","owner":"neuralmagic","isFork":true,"description":"LLM training code for MosaicML foundation models","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":523,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-15T07:25:27.772Z"}},{"type":"Public","name":"upstream-transformers","owner":"neuralmagic","isFork":true,"description":"🤗 Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":26352,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-05T05:53:15.976Z"}},{"type":"Public","name":"yolov5","owner":"neuralmagic","isFork":true,"description":"YOLOv5 in PyTorch &gt; ONNX &gt; CoreML &gt; TFLite","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":5,"issueCount":0,"starsCount":19,"forksCount":16120,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-02T04:31:13.372Z"}},{"type":"Public","name":"upstream-composer","owner":"neuralmagic","isFork":true,"description":"Supercharge Your Model Training","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":414,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-27T17:00:36.601Z"}},{"type":"Public","name":"MixEval","owner":"neuralmagic","isFork":true,"description":"NM fork of MixEval compatible with SparseAutoModel.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":0,"forksCount":29,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-20T16:45:49.274Z"}},{"type":"Public","name":"mamba","owner":"neuralmagic","isFork":true,"description":"Mamba SSM architecture","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1060,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-12T17:15:39.981Z"}},{"type":"Public","name":"causal-conv1d","owner":"neuralmagic","isFork":true,"description":"Causal depthwise conv1d in CUDA, with a PyTorch interface","allTopics":[],"primaryLanguage":{"name":"Cuda","color":"#3A4E3A"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":54,"license":"BSD 3-Clause \"New\" or \"Revised\" License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-08T16:15:12.840Z"}},{"type":"Public","name":"evalplus","owner":"neuralmagic","isFork":true,"description":"NeuralMagic fork of EvalPlus (Rigourous evaluation of LLM-synthesized code - NeurIPS 2023)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":102,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-01T14:31:20.560Z"}},{"type":"Public","name":"sparseml","owner":"neuralmagic","isFork":false,"description":"Libraries for applying sparsification recipes to neural networks with a few lines of code, enabling faster and smaller models","allTopics":["sparsity","keras","deep-learning-algorithms","deep-learning-library","pruning","object-detection","computer-vision-algorithms","onnx","deep-learning-models","sparsification","pruning-algorithms","smaller-models","sparsification-recipes","nlp","tensorflow","pytorch","image-classification","transfer-learning","automl"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":61,"issueCount":5,"starsCount":2043,"forksCount":143,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-01T14:13:12.070Z"}},{"type":"Public","name":"inference","owner":"neuralmagic","isFork":true,"description":"Reference implementations of MLPerf™ inference benchmarks","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":1,"forksCount":519,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-24T18:54:00.017Z"}},{"type":"Public","name":"examples","owner":"neuralmagic","isFork":false,"description":"Notebooks using the Neural Magic libraries 📓","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":3,"issueCount":0,"starsCount":40,"forksCount":7,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-24T17:37:45.262Z"}},{"type":"Public","name":"AutoFP8","owner":"neuralmagic","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":9,"starsCount":147,"forksCount":16,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-23T16:26:48.655Z"}},{"type":"Public","name":"sparsezoo","owner":"neuralmagic","isFork":false,"description":"Neural network model repository for highly sparse and sparse-quantized models with matching sparsification recipes","allTopics":["nlp","computer-vision","deep-learning-algorithms","yolo","resnet","pruning","transfer-learning","pretrained-models","quantization","mobilenet","deep-learning-models","object-detection-model","sparsification-recipe","smaller-models","sparse-quantized-models","models-optimized"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":6,"issueCount":1,"starsCount":364,"forksCount":24,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-19T18:18:21.885Z"}},{"type":"Public","name":"deepsparse","owner":"neuralmagic","isFork":false,"description":"Sparsity-aware deep learning inference runtime for CPUs","allTopics":["nlp","performance","computer-vision","inference","machinelearning","pruning","object-detection","pretrained-models","quantization","cpus","onnx","sparsification","llm-inference","deepsparse"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":21,"issueCount":10,"starsCount":2980,"forksCount":172,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-19T18:18:07.745Z"}},{"type":"Public","name":"cutlass","owner":"neuralmagic","isFork":true,"description":"CUDA Templates for Linear Algebra Subroutines","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":2,"issueCount":0,"starsCount":0,"forksCount":907,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-17T22:16:11.984Z"}},{"type":"Public","name":"llm-foundry","owner":"neuralmagic","isFork":true,"description":"NM fork of LLM foundry for compatibility with SparseAutoModel.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":0,"starsCount":0,"forksCount":523,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-16T16:58:03.757Z"}},{"type":"Public","name":"nm-vllm-utils","owner":"neuralmagic","isFork":false,"description":"Various utilities for use with nm-vllm","allTopics":[],"primaryLanguage":{"name":"Makefile","color":"#427819"},"pullRequestCount":6,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-09T19:21:47.119Z"}},{"type":"Public","name":"alpaca_eval","owner":"neuralmagic","isFork":true,"description":"An automatic evaluator for instruction-following language models. Human-validated, high-quality, cheap, and fast.","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":227,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-09T04:20:55.623Z"}},{"type":"Public","name":"helm-charts","owner":"neuralmagic","isFork":false,"description":"Helm charts for deploying NM VLLM","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":3,"issueCount":0,"starsCount":4,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-06T01:49:00.678Z"}}],"repositoryCount":54,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"neuralmagic repositories"}