Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Token indices sequence length is longer than the specified maximum sequence length for this model (2409 > 2048). Running this sequence through the model will result in indexing errors #13

Open
Baymax1520301 opened this issue Aug 9, 2024 · 3 comments

Comments

@Baymax1520301
Copy link

Token indices sequence length is longer than the specified maximum sequence length for this model (2409 > 2048). Running this sequence through the model will result in indexing errors

I got this warning when I was training the model.Does this warning have an impact on training?How to resolve this warning?

@exiawsh
Copy link
Collaborator

exiawsh commented Aug 20, 2024

This warning has no impact on training.

@Baymax1520301
Copy link
Author

此警告对训练没有影响。

OK, Thanks a lot

@Baymax1520301
Copy link
Author

Baymax1520301 commented Aug 24, 2024

此警告对训练没有影响。

My model is not good enough in 3D perception. This is my config file. May I ask if there is anything that needs to be modified?

base = [
'../../../mmdetection3d/configs/base/datasets/nus-3d.py',
'../../../mmdetection3d/configs/base/default_runtime.py'
]
backbone_norm_cfg = dict(type='LN', requires_grad=True)
plugin=True
plugin_dir='projects/mmdet3d_plugin/'

If point cloud range is changed, the models should also change their point

cloud range accordingly

point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
voxel_size = [0.2, 0.2, 8]
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

For nuScenes we usually do 10-class detection

class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]

num_gpus = 8
batch_size = 8
num_iters_per_epoch = 28130 // (num_gpus * batch_size)
num_epochs = 50
llm_path = 'ckpts/pretrain_qformer/'

collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
input_modality = dict(
use_lidar=True,
use_camera=True,
use_radar=True,
use_map=True,
use_external=True)
model = dict(
type='Petr3D',
save_path='./results_counter_only/', #save path for vlm models.
use_grid_mask=True,
frozen=False,
use_lora=True,
tokenizer=llm_path,
lm_head=llm_path, # set to None if don't use llm head
img_backbone=dict(
type='EVAViT',
img_size=640,
patch_size=16,
window_size=16,
in_chans=3,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4*2/3,
window_block_indexes = (
list(range(0, 2)) + list(range(3, 5)) + list(range(6, 8)) + list(range(9, 11)) + list(range(12, 14)) + list(range(15, 17)) + list(range(18, 20)) + list(range(21, 23))
),
qkv_bias=True,
drop_path_rate=0.3,
flash_attn=True,
with_cp=True,
frozen=False,),
map_head=dict(
type='PETRHeadM',
num_classes=1,
in_channels=1024,
out_dims=4096,
memory_len=600,
with_mask=True, # map query can't see vlm tokens
topk_proposals=300,
num_lane=1800, # 300+1500
num_lanes_one2one=300,
k_one2many=5,
lambda_one2many=1.0,
num_extra=256,
n_control=11,
pc_range=point_cloud_range,
code_weights = [1.0, 1.0],
transformer=dict(
type='PETRTemporalTransformer',
input_dimension=256,
output_dimension=256,
num_layers=6,
embed_dims=256,
num_heads=8,
feedforward_dims=2048,
dropout=0.1,
with_cp=True,
flash_attn=True,),
train_cfg=dict(
assigner=dict(
type='LaneHungarianAssigner',
cls_cost=dict(type='FocalLossCost', weight=1.5),
reg_cost=dict(type='LaneL1Cost', weight=0.02),
iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.5),
loss_bbox=dict(type='L1Loss', loss_weight=0.02),
loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
pts_bbox_head=dict(
type='StreamPETRHead',
num_classes=10,
in_channels=1024,
out_dims=4096,
num_query=600,
with_mask=True,
memory_len=600,
topk_proposals=300,
num_propagated=300,
num_extra=256,
n_control=11, # align with centerline query defination
match_with_velo=False,
scalar=10, ##noise groups
noise_scale = 1.0,
dn_weight= 1.0, ##dn loss weight
split = 0.75, ###positive rate
code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
transformer=dict(
type='PETRTemporalTransformer',
input_dimension=256,
output_dimension=256,
num_layers=6,
embed_dims=256,
num_heads=8,
feedforward_dims=2048,
dropout=0.1,
with_cp=True,
flash_attn=True,
),
bbox_coder=dict(
type='NMSFreeCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
pc_range=point_cloud_range,
max_num=300,
voxel_size=voxel_size,
num_classes=10),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
# model training and testing settings
train_cfg=dict(pts=dict(
grid_size=[512, 512, 1],
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
out_size_factor=4,
assigner=dict(
type='HungarianAssigner3D',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
pc_range=point_cloud_range),)
)
)

dataset_type = 'CustomNuScenesDataset'
data_root = './data/nuscenes/'

file_client_args = dict(backend='disk')

ida_aug_conf = {
"resize_lim": (0.37, 0.45),
"final_dim": (320, 640),
"bot_pct_lim": (0.0, 0.0),
"rot_lim": (0.0, 0.0),
"H": 900,
"W": 1600,
"rand_flip": False,
}

train_pipeline = [
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
with_label=True, with_bbox_depth=True),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
dict(type='LoadAnnoatationVQA',
base_vqa_path='./data/nuscenes/vqa/train/',
base_desc_path='./data/nuscenes/desc/train/',
base_conv_path='./data/nuscenes/conv/train/',
base_key_path='./data/nuscenes/keywords/train/',
tokenizer=llm_path,
max_length=2048,
ignore_type=[],
lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
dict(type='PadMultiViewImage', size_divisor=32),
dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
]
test_pipeline = [
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
dict(type='PadMultiViewImage', size_divisor=32),
dict(type='LoadAnnoatationVQATest',
base_vqa_path='./data/nuscenes/vqa/val/',
base_conv_path='./data/nuscenes/conv/val/',
base_counter_path='./data/nuscenes/eval_cf/',
load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
# load_type=["counter", "planning", "short", "conv"], # please don't test all the questions in single test, it requires quite long time
tokenizer=llm_path,
max_length=2048,),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='PETRFormatBundle3D',
collect_keys=collect_keys,
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
])
]

data = dict(
samples_per_gpu=batch_size,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
seq_split_num=1, # streaming video training
seq_mode=True, # streaming video training
pipeline=train_pipeline,
classes=class_names,
modality=input_modality,
test_mode=False,
use_valid_flag=True,
filter_empty_gt=False,
box_type_3d='LiDAR'),
val=dict(
type=dataset_type,
eval_mode=['lane', 'det'],
pipeline=test_pipeline,
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
classes=class_names,
modality=input_modality),
test=dict(
type=dataset_type,
eval_mode=['lane', 'det'],
pipeline=test_pipeline,
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
classes=class_names,
modality=input_modality),
shuffler_sampler=dict(
type='InfiniteGroupEachSampleInBatchSampler',
seq_split_num=2,
warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
num_iters_to_seq=num_iters_per_epoch,
),
nonshuffler_sampler=dict(type='DistributedSampler')
)

optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
lr=1e-5, betas=(0.9, 0.999), weight_decay=1e-4,
paramwise_cfg={'decay_rate': 0.9,
'head_decay_rate': 4.0,
'lm_head_decay_rate': 0.1,
'decay_type': 'vit_wise',
'num_layers': 24,
})

optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))

learning policy

lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
min_lr_ratio=1e-3,
)

evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)

find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
checkpoint_config = dict(interval=num_iters_per_epoch// 1, max_keep_ckpts=10)
runner = dict(
type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)

load_from='ckpts/eva02_petr_proj.pth'

resume_from=None

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants