Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

change authFile to local path #1455

Merged
merged 4 commits into from
Aug 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/en_US/TrainingService/PaiMode.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod
* shmMB
* Optional key. Set the shmMB configuration of OpenPAI, it set the shared memory for one task in the task role.
* authFile
* Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job).
* Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job), you can prepare the authFile and simply provide the local path of this file, NNI will upload this file to HDFS for you.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Link to master branch?


Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command
```
Expand Down
2 changes: 1 addition & 1 deletion src/nni_manager/training_service/pai/hdfsClientUtility.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export namespace HDFSClientUtility {
* Get NNI experiment root directory
* @param hdfsUserName HDFS user name
*/
function hdfsExpRootDir(hdfsUserName: string): string {
export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
Expand Down
14 changes: 13 additions & 1 deletion src/nni_manager/training_service/pai/paiTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService {
private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise<void>;
private copyAuthFilePromise?: Promise<void>;
private versionCheck: boolean = true;
private logCollection: string;
private isMultiPhase: boolean = false;
private authFileHdfsPath: string | undefined = undefined;

constructor() {
this.log = getLogger();
Expand Down Expand Up @@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService {
HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName),
this.hdfsClient
);

// Upload authFile to hdfs
if (this.paiTrialConfig.authFile) {
this.authFileHdfsPath = unixPathJoin(HDFSClientUtility.hdfsExpRootDir(this.paiClusterConfig.userName), 'authFile');
this.copyAuthFilePromise = HDFSClientUtility.copyFileToHdfs(this.paiTrialConfig.authFile, this.authFileHdfsPath, this.hdfsClient);
}

deferred.resolve();
break;
Expand Down Expand Up @@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService {
await this.copyExpCodeDirPromise;
}

//Make sure authFile is copied from local to HDFS
if (this.paiTrialConfig.authFile) {
await this.copyAuthFilePromise;
}
// Step 1. Prepare PAI job configuration

const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
Expand Down Expand Up @@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService {
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(),
//Task auth File
this.paiTrialConfig.authFile
this.authFileHdfsPath
);

// Step 2. Upload code files in codeDir onto HDFS
Expand Down
3 changes: 1 addition & 2 deletions tools/nni_cmd/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,7 @@ def setPathCheck(key):
'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
'memoryMB': setType('memoryMB', int),
'image': setType('image', str),
Optional('authFile'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
error='ERROR: authFile format error, authFile format is hdfs://xxx.xxx.xxx.xxx:xxx'),
Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'),
Optional('shmMB'): setType('shmMB', int),
Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
Expand Down