Skip to content

Commit

Permalink
Merge pull request huggingface#14 from xenova/clip
Browse files Browse the repository at this point in the history
Add CLIP model
  • Loading branch information
xenova authored Mar 15, 2023
2 parents 00273ff + 1b23229 commit 4fdbc27
Show file tree
Hide file tree
Showing 16 changed files with 522 additions and 172 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Transformers.js
![https://www.npmjs.com/package/@xenova/transformers](https://img.shields.io/npm/v/@xenova/transformers) ![https://www.npmjs.com/package/@xenova/transformers](https://img.shields.io/npm/dw/@xenova/transformers)
![https://github.com/xenova/transformers.js/blob/main/LICENSE](https://img.shields.io/github/license/xenova/transformers.js)

Run 🤗 Transformers in your browser! We currently support [BERT](https://huggingface.co/docs/transformers/model_doc/bert), [ALBERT](https://huggingface.co/docs/transformers/model_doc/albert), [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert), [T5](https://huggingface.co/docs/transformers/model_doc/t5), [T5v1.1](https://huggingface.co/docs/transformers/model_doc/t5v1.1), [FLAN-T5](https://huggingface.co/docs/transformers/model_doc/flan-t5), [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2), [BART](https://huggingface.co/docs/transformers/model_doc/bart), [CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen), [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper), [Vision Transformer](https://huggingface.co/docs/transformers/model_doc/vit), and [VisionEncoderDecoder](https://huggingface.co/docs/transformers/model_doc/vision-encoder-decoder) models, for a variety of tasks including: masked language modelling, text classification, text-to-text generation, translation, summarization, question answering, text generation, automatic speech recognition, image classification, and image-to-text.

[![npm](https://img.shields.io/npm/v/@xenova/transformers)](https://www.npmjs.com/package/@xenova/transformers)
[![downloads](https://img.shields.io/npm/dw/@xenova/transformers)](https://www.npmjs.com/package/@xenova/transformers)
[![license](https://img.shields.io/github/license/xenova/transformers.js)](https://github.com/xenova/transformers.js/blob/main/LICENSE)


Run 🤗 Transformers in your browser! We currently support [BERT](https://huggingface.co/docs/transformers/model_doc/bert), [ALBERT](https://huggingface.co/docs/transformers/model_doc/albert), [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert), [T5](https://huggingface.co/docs/transformers/model_doc/t5), [T5v1.1](https://huggingface.co/docs/transformers/model_doc/t5v1.1), [FLAN-T5](https://huggingface.co/docs/transformers/model_doc/flan-t5), [GPT2](https://huggingface.co/docs/transformers/model_doc/gpt2), [BART](https://huggingface.co/docs/transformers/model_doc/bart), [CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen), [Whisper](https://huggingface.co/docs/transformers/model_doc/whisper), [CLIP](https://huggingface.co/docs/transformers/model_doc/clip), [Vision Transformer](https://huggingface.co/docs/transformers/model_doc/vit), and [VisionEncoderDecoder](https://huggingface.co/docs/transformers/model_doc/vision-encoder-decoder) models, for a variety of tasks including: masked language modelling, text classification, text-to-text generation, translation, summarization, question answering, text generation, automatic speech recognition, image classification, zero-shot image classification, and image-to-text.

![teaser](https://user-images.githubusercontent.com/26504141/221056008-e906614e-e6f0-4e10-b0a8-7d5c99e955b4.gif)

Expand Down
47 changes: 46 additions & 1 deletion assets/js/scripts.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,19 @@ const IMAGE_CLASSIFICATION_OUTPUT_CANVAS = document.getElementById('ic-canvas');
const CODE_COMPLETION_CONTAINER = document.getElementById('code-completion-container');


const ZSIC_SELECT = document.getElementById('zsic-select');
const ZSIC_INPUT = document.getElementById('zsic-file');
const ZSIC_CLASSES = document.getElementById('zsic-classes');
const ZSIC_IMG = document.getElementById('zsic-viewer');
const ZSIC_OUTPUT_CANVAS = document.getElementById('zsic-canvas');



[
[SPEECH2TEXT_SELECT, SPEECH2TEXT_INPUT, SPEECH2TEXT_AUDIO],
[TEXT2IMAGE_SELECT, TEXT2IMAGE_INPUT, TEXT2IMAGE_IMG],
[IMAGE_CLASSIFICATION_SELECT, IMAGE_CLASSIFICATION_INPUT, IMAGE_CLASSIFICATION_IMG],
[ZSIC_SELECT, ZSIC_INPUT, ZSIC_IMG],
].forEach(x => {
let [select, input, media] = x;

Expand Down Expand Up @@ -214,10 +223,36 @@ const CHARTS = {
}]
},
options: CHART_OPTIONS
})
}),

'zsic-canvas': new Chart(ZSIC_OUTPUT_CANVAS, {
type: 'bar',
data: {
labels: ['football', 'airport', 'animals'],
datasets: [{
borderWidth: 1
}]
},
options: CHART_OPTIONS
}),

}


function getZSICClasses() {
return ZSIC_CLASSES.value.split(/\s*,+\s*/g).filter(x => x)

}
ZSIC_CLASSES.addEventListener('input', () => {
// Update labels of graph
let chartToUpdate = CHARTS[ZSIC_OUTPUT_CANVAS.id];

chartToUpdate.data.labels = getZSICClasses();
chartToUpdate.update();
})



function updateVisibility() {
for (let element of TASKS) {
if (element.getAttribute('task').split(',').includes(TASK_SELECTOR.value)) {
Expand All @@ -228,6 +263,7 @@ function updateVisibility() {
}
}
updateVisibility();

// Add event listeners
TASK_SELECTOR.addEventListener('input', updateVisibility);

Expand Down Expand Up @@ -323,6 +359,15 @@ GENERATE_BUTTON.addEventListener('click', async (e) => {
data.updateLabels = true
break;


case 'zero-shot-image-classification':
data.image = getImageDataFromImage(ZSIC_IMG)
data.classes = getZSICClasses()
data.elementIdToUpdate = ZSIC_OUTPUT_CANVAS.id
data.targetType = 'chart'
data.updateLabels = true
break;

default:
return;
}
Expand Down
35 changes: 33 additions & 2 deletions assets/js/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ const TASK_FUNCTION_MAPPING = {
'summarization': summarize,
'automatic-speech-recognition': speech_to_text,
'image-to-text': image_to_text,
'image-classification': image_classification
'image-classification': image_classification,
'zero-shot-image-classification': zero_shot_image_classification,
}

// Listen for messages from UI
Expand Down Expand Up @@ -117,6 +118,12 @@ class ImageClassificationPipelineFactory extends PipelineFactory {
static model = 'google/vit-base-patch16-224';
}


class ZeroShotImageClassificationPipelineFactory extends PipelineFactory {
static task = 'zero-shot-image-classification';
static model = 'openai/clip-vit-base-patch16';
}

async function translate(data) {

let pipeline = await TranslationPipelineFactory.getInstance(data => {
Expand Down Expand Up @@ -364,4 +371,28 @@ async function image_classification(data) {
data: outputs
});

}
}


async function zero_shot_image_classification(data) {
let pipeline = await ZeroShotImageClassificationPipelineFactory.getInstance(data => {
self.postMessage({
type: 'download',
task: 'image-classification',
data: data
});
})

let outputs = await pipeline(data.image, data.classes)

self.postMessage({
type: 'complete',
target: data.elementIdToUpdate,
targetType: data.targetType,
updateLabels: data.updateLabels,
data: outputs
});

}


122 changes: 117 additions & 5 deletions dist/transformers.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dist/transformers.js.map

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/transformers.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/transformers.min.js.map

Large diffs are not rendered by default.

Loading

0 comments on commit 4fdbc27

Please sign in to comment.