Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Fix sequence id issue on resuming experiment #316

Merged
merged 1 commit into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion src/nni_manager/common/experimentStartupInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class ExperimentStartupInfo {
private experimentId: string = '';
private newExperiment: boolean = true;
private initialized: boolean = false;
private initTrialSequenceID: number = 0;

public setStartupInfo(newExperiment: boolean, experimentId: string): void {
assert(!this.initialized);
Expand All @@ -48,6 +49,17 @@ class ExperimentStartupInfo {

return this.newExperiment;
}

public setInitTrialSequenceId(initSequenceId: number): void {
assert(this.initialized);
this.initTrialSequenceID = initSequenceId;
}

public getInitTrialSequenceId(): number {
assert(this.initialized);

return this.initTrialSequenceID;
}
}

function getExperimentId(): string {
Expand All @@ -58,8 +70,17 @@ function isNewExperiment(): boolean {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isNewExperiment();
}

function setInitTrialSequenceId(initSequenceId: number): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setInitTrialSequenceId(initSequenceId);
}

function getInitTrialSequenceId(): number {
return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getInitTrialSequenceId();
}

function setExperimentStartupInfo(newExperiment: boolean, experimentId: string): void {
component.get<ExperimentStartupInfo>(ExperimentStartupInfo).setStartupInfo(newExperiment, experimentId);
}

export { ExperimentStartupInfo, getExperimentId, isNewExperiment, setExperimentStartupInfo };
export { ExperimentStartupInfo, getExperimentId, isNewExperiment,
setExperimentStartupInfo, setInitTrialSequenceId, getInitTrialSequenceId };
1 change: 1 addition & 0 deletions src/nni_manager/common/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ interface ExperimentProfile {
logDir?: string;
startTime?: number;
endTime?: number;
maxSequenceId: number;
revision: number;
}

Expand Down
13 changes: 12 additions & 1 deletion src/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import { Deferred } from 'ts-deferred';
import * as component from '../common/component';
import { DataStore, MetricDataRecord, MetricType, TrialJobInfo } from '../common/datastore';
import { NNIError } from '../common/errors';
import { getExperimentId } from '../common/experimentStartupInfo';
import { getExperimentId, setInitTrialSequenceId } from '../common/experimentStartupInfo';
import { getLogger, Logger } from '../common/log';
import {
ExperimentParams, ExperimentProfile, Manager,
Expand Down Expand Up @@ -152,6 +152,8 @@ class NNIManager implements Manager {
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
const expParams: ExperimentParams = this.experimentProfile.params;

setInitTrialSequenceId(this.experimentProfile.maxSequenceId + 1);

// Set up multiphase config
if (expParams.multiPhase && this.trainingService.isMultiPhaseJobSupported) {
this.trainingService.setClusterMetadata('multiPhase', expParams.multiPhase.toString());
Expand Down Expand Up @@ -457,6 +459,7 @@ class NNIManager implements Manager {
}
};
const trialJobDetail: TrialJobDetail = await this.trainingService.submitTrialJob(trialJobAppForm);
await this.storeMaxSequenceId(trialJobDetail.sequenceId);
this.trialJobs.set(trialJobDetail.id, Object.assign({}, trialJobDetail));
const trialJobDetailSnapshot: TrialJobDetail | undefined = this.trialJobs.get(trialJobDetail.id);
if (trialJobDetailSnapshot != undefined) {
Expand Down Expand Up @@ -588,6 +591,7 @@ class NNIManager implements Manager {
revision: 0,
execDuration: 0,
logDir: getLogDir(),
maxSequenceId: 0,
params: {
authorName: '',
experimentName: '',
Expand All @@ -604,6 +608,13 @@ class NNIManager implements Manager {
}
};
}

private async storeMaxSequenceId(sequenceId: number): Promise<void> {
if (sequenceId > this.experimentProfile.maxSequenceId) {
this.experimentProfile.maxSequenceId = sequenceId;
await this.storeExperimentProfile();
}
}
}

export { NNIManager };
5 changes: 4 additions & 1 deletion src/nni_manager/core/sqlDatabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ create table ExperimentProfile (
startTime integer,
endTime integer,
logDir text,
maxSequenceId integer,
revision integer);
create index ExperimentProfile_id on ExperimentProfile(id);
`;
Expand All @@ -65,6 +66,7 @@ function loadExperimentProfile(row: any): ExperimentProfile {
startTime: row.startTime === null ? undefined : row.startTime,
endTime: row.endTime === null ? undefined : row.endTime,
logDir: row.logDir === null ? undefined : row.logDir,
maxSequenceId: row.maxSequenceId,
revision: row.revision
};
}
Expand Down Expand Up @@ -131,14 +133,15 @@ class SqlDB implements Database {
}

public storeExperimentProfile(exp: ExperimentProfile): Promise<void> {
const sql: string = 'insert into ExperimentProfile values (?,?,?,?,?,?,?)';
const sql: string = 'insert into ExperimentProfile values (?,?,?,?,?,?,?,?)';
const args: any[] = [
JSON.stringify(exp.params),
exp.id,
exp.execDuration,
exp.startTime === undefined ? null : exp.startTime,
exp.endTime === undefined ? null : exp.endTime,
exp.logDir === undefined ? null : exp.logDir,
exp.maxSequenceId,
exp.revision
];

Expand Down
1 change: 1 addition & 0 deletions src/nni_manager/core/test/dataStore.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ describe('Unit test for dataStore', () => {
execDuration: 0,
startTime: Date.now(),
endTime: Date.now(),
maxSequenceId: 0,
revision: 0
}
const id: string = profile.id;
Expand Down
8 changes: 4 additions & 4 deletions src/nni_manager/core/test/sqlDatabase.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ const expParams2: ExperimentParams = {
};

const profiles: ExperimentProfile[] = [
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: undefined, revision: 1 },
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 3 }
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: undefined, revision: 1, maxSequenceId: 0 },
{ params: expParams1, id: '#1', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2, maxSequenceId: 0 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 2, maxSequenceId: 0 },
{ params: expParams2, id: '#2', execDuration: 0, startTime: Date.now(), endTime: Date.now(), revision: 3, maxSequenceId: 0 }
];

const events: TrialJobEventRecord[] = [
Expand Down
1 change: 1 addition & 0 deletions src/nni_manager/rest_server/test/mockedNNIManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ export class MockedNNIManager extends Manager {
execDuration: 0,
startTime: Date.now(),
endTime: Date.now(),
maxSequenceId: 0,
revision: 0
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common
import { getLogger, Logger } from '../../common/log';
import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import {
HostJobApplicationForm, JobApplicationForm, HyperParameters, TrainingService, TrialJobApplicationForm,
TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../../common/trainingService';
import { delay, generateParamFileName, getExperimentRootDir, uniqueString } from '../../common/utils';
import { file } from 'tmp';

const tkill = require('tree-kill');

Expand Down Expand Up @@ -111,7 +111,7 @@ class LocalTrainingService implements TrainingService {
this.initialized = false;
this.stopping = false;
this.log = getLogger();
this.trialSequenceId = 0;
this.trialSequenceId = -1;
}

public async run(): Promise<void> {
Expand Down Expand Up @@ -432,6 +432,10 @@ class LocalTrainingService implements TrainingService {
}

private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}

return this.trialSequenceId++;
}

Expand Down
8 changes: 6 additions & 2 deletions src/nni_manager/training_service/pai/paiTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import * as request from 'request';

import { Deferred } from 'ts-deferred';
import { EventEmitter } from 'events';
import { getExperimentId } from '../../common/experimentStartupInfo';
import { getExperimentId, getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import { HDFSClientUtility } from './hdfsClientUtility'
import { MethodNotImplementedError } from '../../common/errors';
import { getLogger, Logger } from '../../common/log';
Expand Down Expand Up @@ -78,7 +78,7 @@ class PAITrainingService implements TrainingService {
this.experimentId = getExperimentId();
this.paiJobCollector = new PAIJobInfoCollector(this.trialJobsMap);
this.hdfsDirPattern = 'hdfs://(?<host>([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(?<baseDir>/.*)?';
this.trialSequenceId = 0;
this.trialSequenceId = -1;
}

public async run(): Promise<void> {
Expand Down Expand Up @@ -454,6 +454,10 @@ class PAITrainingService implements TrainingService {
}

private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}

return this.trialSequenceId++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import { Deferred } from 'ts-deferred';
import { String } from 'typescript-string-operations';
import * as component from '../../common/component';
import { MethodNotImplementedError, NNIError, NNIErrorNames } from '../../common/errors';
import { getExperimentId } from '../../common/experimentStartupInfo';
import { getExperimentId, getInitTrialSequenceId } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log';
import { ObservableTimer } from '../../common/observableTimer';
import {
Expand Down Expand Up @@ -77,7 +77,7 @@ class RemoteMachineTrainingService implements TrainingService {
this.remoteExpRootDir = this.getRemoteExperimentRootDir();
this.timer = timer;
this.log = getLogger();
this.trialSequenceId = 0;
this.trialSequenceId = -1;
}

/**
Expand Down Expand Up @@ -607,6 +607,10 @@ class RemoteMachineTrainingService implements TrainingService {
}

private generateSequenceId(): number {
if (this.trialSequenceId === -1) {
this.trialSequenceId = getInitTrialSequenceId();
}

return this.trialSequenceId++;
}

Expand Down