Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

alpha #2

Merged
merged 22 commits into from
Mar 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.xsh linguist-language=Python
39 changes: 39 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
language: python
cache:
- pip
- ccache
# - yarn
# - npm
sudo: required
dist: trusty

os:
- linux

env:
- JOB=test

addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
# - gcc-5
# - g++-5
- git
- python3
- python3-pip
- ccache

install:
- sudo pip3 install -r requirements.txt

before_install:

script:
- if [[ "$JOB" == "test" ]]; then /bin/bash ./tests.xsh; fi

notifications:
email:
on_success: change
on_failure: always
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ class Factor(object):
self.out_file = out_file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to add comments for this class

self.his_file = os.path.join('history', out_file)
self.factors = []

Factor.__register__(self.__class__)

def add_record(self, r): # called when the model run, add execution details.
self.factors.append(r)

def test(): # called when test
# can be something comparing the execution details with historical data.
raise NotImplementedError

@staticmethod
def __register__(factor): # factor should be a subclass
assert isinstance(factor, Factor)
Expand All @@ -41,7 +41,7 @@ class Factor(object):
assert Factor.dic[key] is factor
else:
Factor.dic[key] = factor

def __del__(self):
if self.factors:
# write to file self.out_file
Expand All @@ -54,7 +54,7 @@ class TrainDurationFactor(Factor):
def __init__(self, threshold):
super(TrainDurationFactor, self).__init__('train.dura.txt')
self.threshold = threshold

def test(self):
cur_data = _load_nparray_from_file(self.out_file)
his_data = _load_nparray_from_file(self.his_file)
Expand Down Expand Up @@ -126,10 +126,10 @@ To make the testing logic stable, the testable model should ensure that

## Persistence of log

The log of each execution should be stored somewhere,
The log of each execution should be stored somewhere,
the simplest way is to use Git to maintain a versionable history.

After each execution, add all the logs and statistic result and commit with a comment with a
After each execution, add all the logs and statistic result and commit with a comment with a
template like

```
Expand All @@ -141,7 +141,7 @@ paddle code version: {commitid}

## Alarm

If a test failed, ring an alarm by
If a test failed, ring an alarm by

- sending email to `paddle-dev@baidu.com` including
- error type
Expand Down
Empty file added __init__.py
Empty file.
9 changes: 9 additions & 0 deletions baseline.xsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env xonsh
''' Matain the history baseline. '''
$RAISE_SUBPROC_ERROR = True

import sys; sys.path.insert(0, '')
import config
from baseline_strategy import GitStrategy

strategy = GitStrategy(config.baseline_repo_url(), config.baseline_local_repo_path())
146 changes: 146 additions & 0 deletions baseline_strategy.xsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env xonsh
$RAISE_SUBPROC_ERROR = True
$XONSH_SHOW_TRACEBACK = True
'''
Some stragegies define how to update the baseline.
'''
import sys; sys.path.insert(0, '')
import config
import repo
from utils import *

class Strategy(object):
''' The bese class for all strategies. '''

def __call__(self):
log.warn('running baseline strategy')
if self.evaluation_passed():
self.update_baseline()
else:
self.store_failed_kpis()

def refresh_workspace(self):
''' git checkout -b develop origin/master. '''
raise NotImplementedError

def evaluation_passed(self):
''' Whether the current version pass the evaluation. '''
raise NotImplementedError

def update_baseline(self):
''' Update the baseline to the last evaluation records. '''
raise NotImplementedError

def store_failed_kpis(self):
raise NotImplementedError


class GitStrategy(Strategy):
'''
Use a git repo to maintain baseline.

If the current KPI is better than baseline, update the baseline:
- overwrite baseline files with the current KPI.
- git add all the diff
- git commit and push to github

pros:
- use plain text file and easy to debug and analysis
- git based version controling is easy to maintain
- the baseline can be manully managed by changing the baseline repo
cons:
- git can not maintain a big history


details:
- several branches
- **master** for the stable baseline
- **failure** for the latest failed KPI
- **develop** a temporary branch for the working space
'''
def __init__(self, repo_url, local_dst):
'''
repo_url: the url to the repo.
local_dst: the local destination.
'''
log.info('GitStrategy.repo_url', repo_url)
log.info('GitStrategy.local_dst', local_dst)
self.repo_url = repo_url
self.local_dst = local_dst

def refresh_workspace(self):
log.warn('baseline refresh workspace')
with PathRecover():
self._init_repo()
cd @(self.local_dst)
# git checkout -b master origin/master
git checkout -b develop origin/master

def update_baseline(self):
log.warn('baseline update baseline')
with PathRecover():
cd @(self.local_dst)
assert self.cur_branch == "develop", \
"branch %s is should be develop" % self.cur_branch
for model in models():
with PathRecover():
cd @(model)
cp *_factor.txt history/ # update baseline
self._commit_current_kpis()
git checkout master
git merge develop
# only update git repo on production mode
if config.mode == "production":
git push origin master

def evaluation_passed(self):
''' here just use a file as success flag. '''
return evaluation_succeed()

def store_failed_kpis(self):
''' store the failed kpis to failure branch. '''
log.info("store the failed kpis")
with PathRecover():
cd @(self.local_dst)
assert self.cur_branch == 'develop'
# store the failed record
with PathRecover():
for model in models():
cd @(model)
git add *_factor.txt
self._commit_current_kpis()
git push origin develop -f

def _commit_current_kpis(self):
with PathRecover():
assert self.cur_branch == 'develop'
title = "evaluate {commit} {status}".format(
commit = repo.get_paddle_commit(short=True),
status = 'passed' if self.evaluation_passed() else 'failed',)
details = [
"paddle commit: %s" % repo.get_paddle_commit(),
"evaluation status:\n %s" % gstate.get_evaluation_result()
]
cd @(self.local_dst)
log.info('commit current kpi to branch[%s]' % self.cur_branch)
comment = "{title}\n\n{details}".format(
title = title,
details = '\n'.join(details))
git commit -a -m @(comment)


def _init_repo(self):
with PathRecover():
if os.path.isdir(config.baseline_local_repo_path()):
log.info('remove the old baseline: %s' % config.baseline_local_repo_path())
rm -rf @(config.baseline_local_repo_path())
log.warn('git clone baseline from {} to {}'.format(
config.baseline_repo_url(),
config.baseline_local_repo_path()))
git clone @(config.baseline_repo_url()) @(config.baseline_local_repo_path())

@property
def cur_branch(self):
with PathRecover():
cd @(self.local_dst)
return $(git branch | grep -e "^*").strip()[2:]
28 changes: 28 additions & 0 deletions baseline_strategy_test.xsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env xonsh
$RAISE_SUBPROC_ERROR = True
import os
import sys; sys.path.insert(0, '')
import unittest
import config
from utils import *
from baseline_strategy import GitStrategy


class TestMain(unittest.TestCase):
def setUp(self):
config.switch_to_test_mode()
assert "_test" in config.workspace
self.obj = GitStrategy(config.baseline_repo_url(),
config.baseline_local_repo_path())

def test_refresh_workspace(self):
self.obj.refresh_workspace()
self.assertEqual(self.obj.cur_branch, "develop")

# def test_update_baseline(self):
# self.obj.refresh_workspace()
# TODO use some toy branch to test
# self.obj.update_baseline()


unittest.main(module='baseline_strategy_test')
85 changes: 85 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import logging
import shutil

workspace = os.path.dirname(os.path.realpath(__file__)) # pwd
mode = "production"

############################# OFFICIAL CONFIGS #############################
pjoin = os.path.join

# repo address of PaddlePaddle
repo_url = lambda: 'https://github.com/PaddlePaddle/Paddle.git'

# the place to clone paddle source code
local_repo_path = lambda: pjoin(workspace, 'paddle_code')

# the compiled paddle whl to test
compiled_whl_path = lambda: '/usr/local/opt/paddle/share/wheels/paddlepaddle_gpu-0.11.1a1-cp27-cp27mu-linux_x86_64.whl'

# TODO change a official repo
# NOTE make sure that the evaluator machine has the access rights.
# the repo to store history baselines, by default, the latest will be pulled as the baseline.
baseline_repo_url = lambda: 'git@github.com:Superjomn/paddle-modelci-baseline.git'

baseline_local_repo_path = lambda: pjoin(workspace, 'models')

############################# CUSTOM CONFIGS #############################
# just do anything here
# success_flag_file = lambda: pjoin(workspace, 'success.flag') #

############################# DONT CHANGE BELOW #############################
tmp_root = lambda: pjoin(workspace, "tmp")
whl_path = lambda: pjoin(tmp_root(), os.path.basename(compiled_whl_path()))
models_path = lambda: pjoin(workspace, 'models')

log_path = lambda: pjoin(workspace, 'modelci.log')

test_root = pjoin(workspace, "_test_tmp_dir")

# keys for gstate
global_state_root = lambda: pjoin(workspace, "_states")
_state_paddle_code_commit_ = "paddle_code_commit"
_evaluation_result_ = "evaluation_result"
_model_factors_ = "model_factors.json"
_fail_commit_ = 'fail_commit'
_success_commit_ = 'success_commit'

############################# DETAILS BELOW #############################

# set logging
_log_format_ = '[%(asctime)s %(levelname)s] %(message)s'
_log_level_ = logging.DEBUG
logging.basicConfig(
format=_log_format_, level=_log_level_, filename=log_path())


def switch_to_test_mode():
'''
- set ci's workspace to test_root
- clear test_root
'''
global workspace, mode
mode = 'test'
if '_test_' not in workspace:
workspace = test_root

assert "_test_tmp_dir" in test_root
if os.path.isdir(test_root):
shutil.rmtree(test_root)

global baseline_repo_url
baseline_repo_url = lambda: "https://github.com/Superjomn/paddle-modelci-baseline.git"

logging.basicConfig(
format=_log_format_,
level=_log_level_,
filename=pjoin(workspace, 'test.log'))


if not os.path.isdir(test_root):
os.mkdir(test_root)

# os.mkdir(global_state_root())
# os.mkdir(test_root)
# os.mkdir(tmp_root())
Loading