Utilize job protocol in market item (#49)

Change item schema to [pai protocol](https://github.com/microsoft/openpai-protocol): 1. Render item detail by parsing protocol format 2. Directly submit protocol to submit job page when clicking use 3. Add code wrapper
microsoft · Aug 26, 2020 · 3e0cb36 · 3e0cb36
1 parent e75df78
commit 3e0cb36
Show file tree

Hide file tree

Showing 26 changed files with 1,385 additions and 542 deletions.
diff --git a/examples/item_protocols/couplet_dataset.yaml b/examples/item_protocols/couplet_dataset.yaml
@@ -0,0 +1,59 @@
+protocolVersion: 2
+name: couplet_dataset
+type: job
+contributor: OpenPAI
+description: |
+  # Couplet Dataset
+
+  This is the dataset of couplet.
+
+  ## Data content
+
+  This dataset contains processed data based on [Microsoft AI EDU project](https://github.com/microsoft/ai-edu/blob/master/B-%E5%AE%9E%E8%B7%B5%E6%A1%88%E4%BE%8B/B13-AI%E5%AF%B9%E8%81%94%E7%94%9F%E6%88%90%E6%A1%88%E4%BE%8B/docs/fairseq.md).
+
+  The original dataset was downloaded from [Public couplet dataset](https://github.com/wb14123/couplet-dataset) and was splited into ```test, train and valid``` with 98:1:1 proportion. The ```.up``` and ```.down``` files contains upper part and down part of a certain couplet seperately.
+
+  ## The file stucture
+
+  ```
+  .
+  |-- test.down // down part of couplet
+  |-- test.up  // up part of couplet
+  |-- train.down
+  |-- train.up
+  |-- valid.down
+  |-- valid.up
+  ```
+
+  ## How to use it
+
+  The data will be mounted at ```DATA_DIR``` environment variable. You could use ```$DATA_DIR``` in your command when submit jobs in pai.
+
+prerequisites:
+  - name: default_image
+    type: dockerimage
+    uri: 'openpai/standard:python_3.6-pytorch_1.2.0-gpu'
+  - name: couplet_data
+    type: data
+    uri :
+      - /mnt/confignfs/couplet_data
+
+taskRoles:
+  taskrole:
+    instances: 1
+    dockerImage: default_image
+    data: couplet_data
+    resourcePerInstance:
+      cpu: 4
+      memoryMB: 8192
+      gpu: 1
+    commands:
+      - >-
+        # The data stored in environment variable DATA_DIR, you could use it in
+        commands by $DATA_DIR
+      - export DATA_DIR=<% $data.uri[0] %>
+
+extras:
+  storages:
+    - name: confignfs
+      mountPath: /mnt/confignfs
diff --git a/examples/item_protocols/couplet_inference.yaml b/examples/item_protocols/couplet_inference.yaml
@@ -0,0 +1,62 @@
+protocolVersion: 2
+name: couplet_inference
+type: job
+contributor: OpenPAI
+description: |
+  # Couplet Training Job Template
+
+  This is a model inference process. The input data is the trainning models trained by ```couplet training job```, and the this job will produce a url for user to ask for down part for a upper part of couplet.
+
+  ## How to use
+
+  When use this module, you should set three environment variables:
+
+  - ```DATA_DIR```: the training model path in container, by default it uses the output of couplet training job. If you want to use your own models. First make sure mount your models into container, and then change the ```$DATA_DIR``` with the path.
+
+  - ```CODE_DIR```: the service code, it will start a server at the given port.
+
+  - ```FLASK_RUN_PORT```: the service port container will output.
+
+  ## How to check the result
+
+  After job finished successfully, you could check the job detail to get the container ip and ```flask_port``` number, then go to http://<ip>:<flask_port>/upper=<input> to test the result.
+
+
+prerequisites:
+  - name: default_image
+    type: dockerimage
+    uri: "openpai/standard:python_3.6-pytorch_1.2.0-gpu"
+  - name: couplet_data
+    type: data
+    uri:
+      - /mnt/confignfs/couplet/checkpoints
+  - name: code
+    type: script
+    uri: /mnt/confignfs/couplet
+
+taskRoles:
+  taskrole:
+    instances: 1
+    dockerImage: default_image
+    data: couplet_data
+    script: code
+    resourcePerInstance:
+      cpu: 4
+      memoryMB: 8192
+      gpu: 1
+      ports:
+        FLASK_PORT: 1
+    commands:
+      - export DATA_DIR=<% $data.uri[0] %>
+      - export CODE_DIR=<% $script.uri %>
+      - export FLASK_PORT=$PAI_PORT_LIST_taskrole_0_FLASK_PORT
+      - pip install fairseq
+      - pip install flask
+      - pip install gunicorn
+      - 'cd ${CODE_DIR}'
+      - 'gunicorn --bind=0.0.0.0:${FLASK_PORT} app:app'
+
+extras:
+  storages:
+    - name: confignfs
+      mountPath: /mnt/confignfs
diff --git a/examples/item_protocols/couplet_training.yaml b/examples/item_protocols/couplet_training.yaml
@@ -0,0 +1,76 @@
+protocolVersion: 2
+name: couplet_training
+type: job
+contributor: OpenPAI
+description: |
+  # Couplet Training Job Template
+
+  This is a model training process. After training, this model will give a down part with an upper part of couplet. Please refer to Microsoft AI Edu Project for more details.
+
+  ## Training Data
+
+  You could use Couplet Dataset data component as training data, or any dataset follows fairseq model requirements.
+
+  ## How to use
+
+  When use this module, you should set three environment variables:
+
+  - ```DATA_DIR```: the training data path in container, by default it uses Couplet Dataset data component. If you want to use your own datasets. First make sure mount your data into container, and then change the ```$DATA_DIR``` with the path.
+
+  - PREPROCESSED_DATA_DIR: the path to store intermediate result, by default it is ./processed_data.
+
+  - ```OUTPUT_DIR```: the path to store output result, i.e. the training model files. By default it will mount a nfs storage, and you could change it with other mounted storage.
+
+  ## How to check the result
+
+  After job finished successfully, you could check the output model files in the output storage. The storage server url is in details page.
+
+prerequisites:
+  - name: default_image
+    type: dockerimage
+    uri: "openpai/standard:python_3.6-pytorch_1.2.0-gpu"
+  - name: couplet_data
+    type: data
+    uri:
+      - /mnt/confignfs/couplet_data
+  - name: output
+    type: output
+    uri: /mnt/confignfs/output
+
+taskRoles:
+  taskrole:
+    instances: 1
+    dockerImage: default_image
+    data: couplet_data
+    output: output
+    resourcePerInstance:
+      cpu: 4
+      memoryMB: 8192
+      gpu: 1
+    commands:
+      - export DATA_DIR=<% $data.uri[0] %>
+      - export OUTPUT_DIR=<% $output.uri %>
+      - export PREPROCESSED_DATA_DIR=./preprocessed_data
+      - pip install fairseq
+      - fairseq-preprocess \
+      - '--source-lang up \'
+      - '--target-lang down \'
+      - '--trainpref ${DATA_DIR}/train \'
+      - '--validpref ${DATA_DIR}/valid \'
+      - '--testpref ${DATA_DIR}/test \'
+      - "--destdir ${PREPROCESSED_DATA_DIR}"
+      - 'fairseq-train ${PREPROCESSED_DATA_DIR} \'
+      - '--log-interval 100 \'
+      - '--lr 0.25 \'
+      - '--clip-norm 0.1 \'
+      - '--dropout 0.2  \'
+      - '--criterion label_smoothed_cross_entropy \'
+      - '--save-dir ${OUTPUT_DIR} \'
+      - '-a lstm \'
+      - '--max-tokens 4000 \'
+      - "--max-epoch 100"
+
+extras:
+  storages:
+    - name: confignfs
+      mountPath: /mnt/confignfs
diff --git a/examples/yaml_templates/couplet_dataset.yaml b/examples/yaml_templates/couplet_dataset.yaml
@@ -0,0 +1,57 @@
+protocolVersion: 2
+name: Couplet Dataset
+type: job
+contributor: OpenPAI
+description: |
+  # Couplet Dataset
+
+  This is the dataset of couplet.
+
+  ## Data content
+
+  This dataset contains processed data based on [Microsoft AI EDU project](https://github.com/microsoft/ai-edu/blob/master/B-%E5%AE%9E%E8%B7%B5%E6%A1%88%E4%BE%8B/B13-AI%E5%AF%B9%E8%81%94%E7%94%9F%E6%88%90%E6%A1%88%E4%BE%8B/docs/fairseq.md).
+
+  The original dataset was downloaded from [Public couplet dataset](https://github.com/wb14123/couplet-dataset) and was splited into ```test, train and valid``` with 98:1:1 proportion. The ```.up``` and ```.down``` files contains upper part and down part of a certain couplet seperately.
+
+  ## The file stucture
+
+  ```
+  .
+  |-- test.down // down part of couplet
+  |-- test.up  // up part of couplet
+  |-- train.down
+  |-- train.up
+  |-- valid.down
+  |-- valid.up
+  ```
+
+  ## How to use it
+
+  The data will be mounted at ```DATA_DIR``` environment variable. You could use ```$DATA_DIR``` in your command when submit jobs in pai.
+
+prerequisites:
+  - name: default_image
+    type: dockerimage
+    uri: 'openpai/standard:python_3.6-pytorch_1.2.0-gpu'
+  - name: couplet_data
+    type: data
+    uri : /mnt/confignfs/couplet_data
+
+taskRoles:
+  taskrole:
+    instances: 1
+    dockerImage: default_image
+    resourcePerInstance:
+      cpu: 4
+      memoryMB: 8192
+      gpu: 1
+    commands:
+      - >-
+        # The data stored in environment variable DATA_DIR, you could use it in
+        commands by $DATA_DIR
+      - export DATA_DIR= <% $data.uri[0] %>
+
+extras:
+  storages:
+    - name: confignfs
+      mountPath: /mnt/confignfs
diff --git a/index.html b/index.html
@@ -0,0 +1,10 @@
+<p>Webcome to openpaimarketplace github page! You could use build plugin file as
+  <a href="https://microsoft.github.io/openpaimarketplace/publish/marketplace/plugin.js">
+    https://microsoft.github.io/openpaimarketplace/publish/marketplace/plugin.js
+  </a>
+</p>
+<p> The submit job v2 plugin file:
+  <a href="https://microsoft.github.io/openpaimarketplace/publish/submit-job-v2/plugin.js">
+    https://microsoft.github.io/openpaimarketplace/publish/submit-job-v2/plugin.js
+  </a>
+</p>
diff --git a/publish/marketplace/plugin.js b/publish/marketplace/plugin.js
diff --git a/publish/submit-job-v2/1.chunk.js b/publish/submit-job-v2/1.chunk.js
diff --git a/publish/submit-job-v2/2.chunk.js b/publish/submit-job-v2/2.chunk.js
diff --git a/publish/submit-job-v2/editor.worker.js b/publish/submit-job-v2/editor.worker.js
diff --git a/publish/submit-job-v2/plugin.js b/publish/submit-job-v2/plugin.js
diff --git a/webportal/jsconfig.json b/webportal/jsconfig.json
@@ -1,8 +1,7 @@
 {
   "compilerOptions": {
-    "target": "es2017",
     "allowSyntheticDefaultImports": false,
-    "baseUrl": "./",
+    "baseUrl": ".",
     "paths": {
       "App/*": ["src/app/*"]
     }

diff --git a/webportal/src/app/assets/data.svg b/webportal/src/app/assets/data.svg
diff --git a/webportal/src/app/assets/template.svg b/webportal/src/app/assets/template.svg
diff --git a/webportal/src/app/components/code_wrapper.jsx b/webportal/src/app/components/code_wrapper.jsx
@@ -0,0 +1,22 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+import styled from 'styled-components';
+
+const CodeWrapper = styled.pre`
+  background: #f4f4f4;
+  border: 1px solid #ddd;
+  border-left: 3px solid #f36d33;
+  color: #666;
+  page-break-inside: avoid;
+  font-family: monospace;
+  font-size: 15px;
+  line-height: 1.6;
+  margin-bottom: 1.6em;
+  max-width: 100%;
+  overflow: auto;
+  padding: 1em 1.5em;
+  display: block;
+  word-wrap: break-word;
+`;
+
+export default CodeWrapper;
diff --git a/webportal/src/app/market_detail/components/data_detail.jsx b/webportal/src/app/market_detail/components/data_detail.jsx
@@ -8,11 +8,14 @@ import StorageCard from './storage_card';
 
 const DataDetail = props => {
   const { marketItem } = props;
+  const dataStorages = marketItem.protocol.prerequisites.filter(
+    item => item.type === 'data',
+  );
 
   return (
     <Stack gap='m'>
       <Text variant='large'>Data Storage</Text>
-      <StorageCard storage={marketItem.content.dataStorage} />
+      <StorageCard storages={dataStorages} />
     </Stack>
   );
 };

diff --git a/webportal/src/app/market_detail/components/old_detail.jsx b/webportal/src/app/market_detail/components/old_detail.jsx
@@ -1,38 +1,19 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-import { getTheme, Stack, Text } from 'office-ui-fabric-react';
-import React, { useEffect, useState } from 'react';
+import { Stack, Text } from 'office-ui-fabric-react';
+import React from 'react';
 import PropTypes from 'prop-types';
-import styled from 'styled-components';
-
-const { palette, spacing } = getTheme();
-
-const Wrapper = styled.div`
-  background-color: ${palette.neutralLighterAlt};
-  padding: ${spacing.m};
-  white-space: pre-wrap;
-`;
+import yaml from 'js-yaml';
+import CodeWrapper from 'App/components/code_wrapper';
 
 const OldDetail = props => {
   const { marketItem } = props;
-  const [protocolText, setProtocolText] = useState('');
-
-  useEffect(() => {
-    async function fetchProtocol() {
-      const res = await fetch(
-        `https://microsoft.github.io/openpaimarketplace/examples/yaml_templates/${marketItem.content.config}`,
-      );
-      const text = await res.text();
-      setProtocolText(text);
-    }
-    fetchProtocol();
-  }, []);
 
   return (
     <Stack gap='m'>
       <Text variant='large'>Protocol</Text>
-      <Wrapper>{protocolText}</Wrapper>
+      <CodeWrapper>{yaml.safeDump(marketItem.protocol)}</CodeWrapper>
     </Stack>
   );
 };

diff --git a/webportal/src/app/market_detail/components/pivot_card.jsx b/webportal/src/app/market_detail/components/pivot_card.jsx
@@ -35,7 +35,7 @@ const PivotCard = props => {
           <PivotItemWrapper>
             <ReactMarkdown
               className='markdown-body'
-              source={marketItem.description}
+              source={marketItem.protocol.description}
             />
           </PivotItemWrapper>
         </PivotItem>