Skip to content

Commit

Permalink
Parse and store all DDEX standard identitiers (#7818)
Browse files Browse the repository at this point in the history
  • Loading branch information
michellebrier authored Mar 14, 2024
1 parent 7c77df0 commit 751df35
Show file tree
Hide file tree
Showing 26 changed files with 190 additions and 38 deletions.
1 change: 1 addition & 0 deletions packages/common/src/models/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export type CollectionMetadata = {
local?: boolean
release_date?: string
ddex_app?: string | null
ddex_release_ids?: any | null
}

export type CollectionDownloadReason = { is_from_favorites: boolean }
Expand Down
8 changes: 6 additions & 2 deletions packages/common/src/schemas/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ const trackMetadataSchema = {
iswc: null,
download: null,
is_playlist_upload: false,
ai_attribution_user_id: null
ai_attribution_user_id: null,
ddex_release_ids: null,
ddex_app: null
}

export const newTrackMetadata = (fields, validate = false): TrackMetadata => {
Expand Down Expand Up @@ -91,7 +93,9 @@ const collectionMetadataSchema = {
save_count: null,
license: null,
upc: null,
description: null
description: null,
ddex_release_ids: null,
ddex_app: null
}

export const newCollectionMetadata = (fields?: any, validate = false) => {
Expand Down
19 changes: 19 additions & 0 deletions packages/ddex/ingester/common/sdk_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,31 @@ type NullableBool = *bool
type NullableString = *string
type NullableInt = *int

type ReleaseIDs struct {
PartyID string `bson:"party_id,omitempty"`
CatalogNumber string `bson:"catalog_number,omitempty"`
ICPN string `bson:"icpn,omitempty"`
GRid string `bson:"grid,omitempty"`
ISAN string `bson:"isan,omitempty"`
ISBN string `bson:"isbn,omitempty"`
ISMN string `bson:"ismn,omitempty"`
ISRC string `bson:"isrc,omitempty"`
ISSN string `bson:"issn,omitempty"`
ISTC string `bson:"istc,omitempty"`
ISWC string `bson:"iswc,omitempty"`
MWLI string `bson:"mwli,omitempty"`
SICI string `bson:"sici,omitempty"`
ProprietaryID string `bson:"proprietary_id,omitempty"`
}

type TrackMetadata struct {
Title string `bson:"title"`
ReleaseDate time.Time `bson:"release_date"`
Genre Genre `bson:"genre"`
Duration int `bson:"duration"`
PreviewStartSeconds NullableInt `bson:"preview_start_seconds,omitempty"`
ISRC NullableString `bson:"isrc,omitempty"`
DDEXReleaseIDs ReleaseIDs `bson:"ddex_release_ids"`

// TODO: Handle License from PLineText?
License NullableString `bson:"license,omitempty"`
Expand Down Expand Up @@ -205,6 +223,7 @@ type CollectionMetadata struct {
Genre Genre `bson:"genre"`
Mood Mood `bson:"mood,omitempty"`
ReleaseDate time.Time `bson:"release_date"`
DDEXReleaseIDs ReleaseIDs `bson:"ddex_release_ids"`

// TODO: Handle these fields
License NullableString `bson:"license,omitempty"`
Expand Down
100 changes: 67 additions & 33 deletions packages/ddex/ingester/e2e_test/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,19 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
IsPrivate: false,
Genre: "Metal",
ReleaseDate: publishDate,
CoverArtURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T7_007.jpg",
DDEXReleaseIDs: common.ReleaseIDs{
ICPN: "721620118165",
},
CoverArtURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T7_007.jpg",
},
DDEXReleaseRef: "R0",
Tracks: []common.TrackMetadata{
{
Title: "Can you feel ...the Monkey Claw!",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Can you feel ...the Monkey Claw!",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000001",
},
Genre: "Metal",
Duration: 811,
PreviewStartSeconds: intPtr(0),
Expand All @@ -161,8 +167,11 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
AudioFileURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T1_001.wav",
},
{
Title: "Red top mountain, blown sky high",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Red top mountain, blown sky high",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000002",
},
Genre: "Metal",
Duration: 366,
PreviewStartSeconds: intPtr(0),
Expand All @@ -176,8 +185,11 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
AudioFileURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T2_002.wav",
},
{
Title: "Seige of Antioch",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Seige of Antioch",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000003",
},
Genre: "Metal",
Duration: 1269,
PreviewStartSeconds: intPtr(0),
Expand All @@ -191,8 +203,11 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
AudioFileURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T3_003.wav",
},
{
Title: "Warhammer",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Warhammer",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000004",
},
Genre: "Metal",
Duration: 165,
PreviewStartSeconds: intPtr(0),
Expand All @@ -206,8 +221,11 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
AudioFileURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T4_004.wav",
},
{
Title: "Iron Horse",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Iron Horse",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000005",
},
Genre: "Metal",
Duration: 294,
PreviewStartSeconds: intPtr(0),
Expand All @@ -221,8 +239,11 @@ func (e *e2eTest) runERN382Batched(t *testing.T) {
AudioFileURL: "s3://audius-test-crawled/721620118165/resources/721620118165_T5_005.wav",
},
{
Title: "Yes... I can feel the Monkey Claw!",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
Title: "Yes... I can feel the Monkey Claw!",
ReleaseDate: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
DDEXReleaseIDs: common.ReleaseIDs{
ISRC: "CASE00000006",
},
Genre: "Metal",
Duration: 741,
PreviewStartSeconds: intPtr(0),
Expand Down Expand Up @@ -286,10 +307,15 @@ func (e *e2eTest) runERN381ReleaseByRelease(t *testing.T) {
assert.Equal(t, common.CreateAlbumRelease{
DDEXReleaseRef: "R0",
Metadata: common.CollectionMetadata{
PlaylistName: "Present.",
PlaylistOwnerID: "Bmv3bJ",
PlaylistOwnerName: "Theo Random",
ReleaseDate: publishDate,
PlaylistName: "Present.",
PlaylistOwnerID: "Bmv3bJ",
PlaylistOwnerName: "Theo Random",
ReleaseDate: publishDate,
DDEXReleaseIDs: common.ReleaseIDs{
CatalogNumber: "G010005108088N",
GRid: "A10301A0005108088N",
ICPN: "196871335584",
},
IsAlbum: true,
IsPrivate: false,
Genre: common.HipHopRap,
Expand All @@ -299,29 +325,37 @@ func (e *e2eTest) runERN381ReleaseByRelease(t *testing.T) {
},
Tracks: []common.TrackMetadata{
{
Title: "Playing With Fire.",
ReleaseDate: time.Time{},
Genre: common.HipHopRap,
Duration: 279,
Artists: []common.Artist{{Name: "Theo Random", Roles: []string{"AssociatedPerformer", "MainArtist"}}},
ArtistID: "",
ArtistName: "",
ISRC: stringPtr("ZAA012300131"),
Title: "Playing With Fire.",
ReleaseDate: time.Time{},
Genre: common.HipHopRap,
Duration: 279,
Artists: []common.Artist{{Name: "Theo Random", Roles: []string{"AssociatedPerformer", "MainArtist"}}},
ArtistID: "",
ArtistName: "",
ISRC: stringPtr("ZAA012300131"),
DDEXReleaseIDs: common.ReleaseIDs{
GRid: "A10328E0010879163O",
ISRC: "ZAA012300131",
},
PreviewStartSeconds: intPtr(48),
PreviewAudioFileURL: fmt.Sprintf("s3://audius-test-crawled/%s/", pendingRelease.ReleaseID),
AudioFileURL: fmt.Sprintf("s3://audius-test-crawled/%s/resources/A10301A0005108088N_T-1096524256352_SoundRecording_001-001.m4a", pendingRelease.ReleaseID),
AudioFileURLHash: "8bb2ce119257314a8fcb215a49f14b33",
AudioFileURLHashAlgo: "MD5",
},
{
Title: "No Comment.",
ReleaseDate: time.Time{},
Genre: common.HipHopRap,
Duration: 142,
ArtistID: "",
ArtistName: "",
Artists: []common.Artist{{Name: "Theo Random", Roles: []string{"AssociatedPerformer", "MainArtist"}}, {Name: "Thato Saul", Roles: []string{"AssociatedPerformer", "MainArtist"}}},
ISRC: stringPtr("ZAA012300128"),
Title: "No Comment.",
ReleaseDate: time.Time{},
Genre: common.HipHopRap,
Duration: 142,
ArtistID: "",
ArtistName: "",
Artists: []common.Artist{{Name: "Theo Random", Roles: []string{"AssociatedPerformer", "MainArtist"}}, {Name: "Thato Saul", Roles: []string{"AssociatedPerformer", "MainArtist"}}},
ISRC: stringPtr("ZAA012300128"),
DDEXReleaseIDs: common.ReleaseIDs{
GRid: "A10328E0010879164M",
ISRC: "ZAA012300128",
},
PreviewStartSeconds: intPtr(48),
PreviewAudioFileURL: fmt.Sprintf("s3://audius-test-crawled/%s/", pendingRelease.ReleaseID),
AudioFileURL: fmt.Sprintf("s3://audius-test-crawled/%s/resources/A10301A0005108088N_T-1096524142976_SoundRecording_001-002.m4a", pendingRelease.ReleaseID),
Expand Down
27 changes: 26 additions & 1 deletion packages/ddex/ingester/parser/ern38x.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,24 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,
isrc := safeInnerText(rNode.SelectElement("ReleaseId/ISRC"))
releaseType := safeInnerText(rNode.SelectElement("ReleaseType"))

// Release IDs
ddexReleaseIDs := &common.ReleaseIDs{
PartyID: safeInnerText(rNode.SelectElement("ReleaseId/PartyId")),
CatalogNumber: safeInnerText(rNode.SelectElement("ReleaseId/CatalogNumber")),
ICPN: safeInnerText(rNode.SelectElement("ReleaseId/ICPN")),
GRid: safeInnerText(rNode.SelectElement("ReleaseId/GRid")),
ISAN: safeInnerText(rNode.SelectElement("ReleaseId/ISAN")),
ISBN: safeInnerText(rNode.SelectElement("ReleaseId/ISBN")),
ISMN: safeInnerText(rNode.SelectElement("ReleaseId/ISMN")),
ISRC: isrc,
ISSN: safeInnerText(rNode.SelectElement("ReleaseId/ISSN")),
ISTC: safeInnerText(rNode.SelectElement("ReleaseId/ISTC")),
ISWC: safeInnerText(rNode.SelectElement("ReleaseId/ISWC")),
MWLI: safeInnerText(rNode.SelectElement("ReleaseId/MWLI")),
SICI: safeInnerText(rNode.SelectElement("ReleaseId/SICI")),
ProprietaryID: safeInnerText(rNode.SelectElement("ReleaseId/ProprietaryId")),
}

// Convert releaseDate from string of format YYYY-MM-DD to time.Time
if releaseDateStr == "" {
err = fmt.Errorf("missing release date for <ReleaseReference>%s</ReleaseReference>", releaseRef)
Expand Down Expand Up @@ -286,6 +304,7 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,
PlaylistName: title,
PlaylistOwnerName: artistName,
ReleaseDate: releaseDate,
DDEXReleaseIDs: *ddexReleaseIDs,
Genre: genre,
IsAlbum: true,
IsPrivate: false, // TODO: Use DealList to determine this. Same with releaseDate because I think the XML element it's reading is deprecated
Expand Down Expand Up @@ -340,12 +359,17 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,
trackMetadata.Title = title
}

if *trackMetadata.ISRC == "" {
if trackMetadata.ISRC == nil || *trackMetadata.ISRC == "" {
if isrc == "" {
err = fmt.Errorf("missing isrc for <ReleaseReference>%s</ReleaseReference>", releaseRef)
return
}
*trackMetadata.ISRC = isrc
} else {
if *trackMetadata.ISRC != isrc {
// Use the ISRC from the SoundRecording if it differs from the Release ISRC
(*ddexReleaseIDs).ISRC = *trackMetadata.ISRC
}
}

if trackMetadata.Genre == "" {
Expand All @@ -368,6 +392,7 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,

trackMetadata.ArtistName = artistName
trackMetadata.ReleaseDate = releaseDate
trackMetadata.DDEXReleaseIDs = *ddexReleaseIDs
trackMetadata.Copyright = copyright
trackMetadata.CoverArtURL = coverArtURL
trackMetadata.CoverArtURLHash = coverArtURLHash
Expand Down
22 changes: 20 additions & 2 deletions packages/ddex/ingester/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,23 @@ func (p *Parser) parseRelease(release *common.UnprocessedRelease, deliveryZipFil

// If there's an album release, the tracks we parsed out are actually part of the album release
if len(createAlbumRelease) > 0 {
// Copy release IDs from individual track releases to the album's tracks
isrcToReleaseIDsMap := make(map[string]common.ReleaseIDs)
for _, trackRelease := range createTrackRelease {
if trackRelease.Metadata.ISRC != nil {
isrcToReleaseIDsMap[*trackRelease.Metadata.ISRC] = trackRelease.Metadata.DDEXReleaseIDs
}
}
for i, album := range createAlbumRelease {
for j, trackMetadata := range album.Tracks {
if trackMetadata.ISRC != nil {
if releaseIDs, exists := isrcToReleaseIDsMap[*trackMetadata.ISRC]; exists {
createAlbumRelease[i].Tracks[j].DDEXReleaseIDs = releaseIDs
}
}
}
}
// Clear the individual track releases
createTrackRelease = []common.CreateTrackRelease{}
}

Expand Down Expand Up @@ -302,10 +319,11 @@ func (p *Parser) parseBatch(batch *common.UnprocessedBatch, deliveryZipFileETag
// TODO: Support more ID types (GRid is preferred) as we get more examples
var releaseID string
releaseICPN := safeInnerText(messageInBatch.SelectElement("IncludedReleaseId/ICPN"))
releaseGRid := safeInnerText(messageInBatch.SelectElement("IncludedReleaseId/GRid"))
if releaseICPN != "" {
releaseID = releaseICPN
// } else if releaseGRid != "" {
// releaseID = releaseGRid
} else if releaseGRid != "" {
releaseID = releaseGRid
} else {
err := fmt.Errorf("no valid IncludedReleaseId found")
batch.ValidationErrors = append(batch.ValidationErrors, err.Error())
Expand Down
2 changes: 2 additions & 0 deletions packages/ddex/publisher/src/models/pendingReleases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ const artistSchema = new mongoose.Schema({
const trackMetadataSchema = new mongoose.Schema({
title: { type: String, required: true },
release_date: { type: Date, required: true },
ddex_release_ids: mongoose.Schema.Types.Mixed,
genre: { type: String, enum: genres, required: true },
duration: { type: Number, required: true },
preview_start_seconds: Number,
Expand Down Expand Up @@ -120,6 +121,7 @@ const collectionMetadataSchema = new mongoose.Schema({
playlist_owner_id: { type: String, required: true },
genre: { type: String, enum: genres, required: true },
release_date: { type: Date, required: true },
ddex_release_ids: mongoose.Schema.Types.Mixed,
description: String,
is_album: Boolean,
is_private: Boolean,
Expand Down
2 changes: 2 additions & 0 deletions packages/ddex/publisher/src/services/publisherService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const formatTrackMetadata = (
isrc: metadata.isrc,
license: metadata.license,
releaseDate: new Date(metadata.release_date),
ddexReleaseIds: metadata.ddex_release_ids,
previewStartSeconds: metadata.preview_start_seconds ?? undefined,
// isUnlisted: // TODO: set visibility
// iswc:
Expand All @@ -56,6 +57,7 @@ const formatAlbumMetadata = (
license: metadata.license || '',
mood: (metadata.mood || 'Other') as Mood, // TODO: SDK requires mood, but XML doesn't provide one
releaseDate: new Date(metadata.release_date),
ddexReleaseIds: metadata.ddex_release_ids,
tags: metadata.tags || '',
upc: metadata.upc || '',
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
begin;

alter table tracks add column if not exists ddex_release_ids jsonb;
create index if not exists idx_ddex_release_ids on tracks using GIN (ddex_release_ids);

alter table playlists add column if not exists ddex_release_ids jsonb;
create index if not exists idx_ddex_release_ids on playlists using GIN (ddex_release_ids);

commit;
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def test_valid_parse_metadata(app):
"license": "All rights reserved",
"isrc": None,
"iswc": None,
"ddex_release_ids": None,
"track_segments": [
{
"duration": 6.016,
Expand Down
Loading

0 comments on commit 751df35

Please sign in to comment.