diff --git a/lib/marcel/magic.rb b/lib/marcel/magic.rb index 6797ee4..2554034 100644 --- a/lib/marcel/magic.rb +++ b/lib/marcel/magic.rb @@ -25,25 +25,34 @@ def initialize(type) # Option keys: # * :extensions: String list or single string of file extensions # * :parents: String list or single string of parent mime types + # * :aliases: String list or single string of aliased mime types # * :magic: Mime magic specification # * :comment: Comment string def self.add(type, options) extensions = [options[:extensions]].flatten.compact + extensions.each {|ext| EXTENSIONS[ext] = type } TYPE_EXTS[type] = extensions + + TYPE_ALIASES.delete(type) + [options[:aliases]].flatten.compact.each do |aliased| + TYPE_ALIASES[aliased] = type + end + parents = [options[:parents]].flatten.compact TYPE_PARENTS[type] = parents unless parents.empty? - extensions.each {|ext| EXTENSIONS[ext] = type } + MAGIC.unshift [type, options[:magic]] if options[:magic] end - # Removes a mime type from the dictionary. You might want to do this if + # Removes a mime type from the dictionary. You might want to do this if # you're seeing impossible conflicts (for instance, application/x-gmc-link). - # * type: The mime type to remove. All associated extensions and magic are removed too. + # * type: The mime type to remove. def self.remove(type) - EXTENSIONS.delete_if {|ext, t| t == type } - MAGIC.delete_if {|t, m| t == type } + EXTENSIONS.delete_if { |ext, t| t == type } + MAGIC.delete_if { |t, m| t == type } TYPE_EXTS.delete(type) TYPE_PARENTS.delete(type) + TYPE_ALIASES.delete_if { |aliased, canonical| aliased == type || canonical == type } end # Returns true if type is a text format @@ -64,11 +73,24 @@ def extensions TYPE_EXTS[type] || [] end + def canonical + if to = TYPE_ALIASES[type] + self.class.new(to) + else + self + end + end + # Get mime comment def comment nil # deprecated end + # Lookup canonical mime type by mime type string + def self.by_type(type) + new(type.downcase).canonical if type + end + # Lookup mime type by file extension def self.by_extension(ext) ext = ext.to_s.downcase @@ -111,9 +133,14 @@ def hash alias == eql? def self.child?(child, parent) + child, parent = canonical(child), canonical(parent) child == parent || TYPE_PARENTS[child]&.any? {|p| child?(p, parent) } end + def self.canonical(aliased_type) + by_type(aliased_type)&.type + end + def self.magic_match(io, method) return magic_match(StringIO.new(io.to_s), method) unless io.respond_to?(:read) diff --git a/lib/marcel/mime_type.rb b/lib/marcel/mime_type.rb index 23da698..b73bfbe 100644 --- a/lib/marcel/mime_type.rb +++ b/lib/marcel/mime_type.rb @@ -5,10 +5,16 @@ class MimeType BINARY = "application/octet-stream" class << self - def extend(type, extensions: [], parents: [], magic: nil) + def extend(type, extensions: [], aliases: [], parents: [], magic: nil) + if canonical = Marcel::TYPE_ALIASES[type] + warn "#{type} was already aliased to #{canonical}" + end + extensions = (Array(extensions) + Array(Marcel::TYPE_EXTS[type])).uniq + existing_aliases = Marcel::TYPE_ALIASES.select { |_, t| t == type }.keys + aliases = (Array(aliases) + existing_aliases).uniq parents = (Array(parents) + Array(Marcel::TYPE_PARENTS[type])).uniq - Magic.add(type, extensions: extensions, magic: magic, parents: parents) + Magic.add(type, extensions: extensions, magic: magic, aliases: aliases, parents: parents) end # Returns the most appropriate content type for the given file. @@ -32,7 +38,6 @@ def for(pathname_or_io = nil, name: nil, extension: nil, declared_type: nil) end private - def for_data(pathname_or_io) if pathname_or_io with_io(pathname_or_io) do |io| @@ -60,7 +65,7 @@ def for_extension(extension) end def for_declared_type(declared_type) - type = parse_media_type(declared_type) + type = Marcel::Magic.canonical(parse_media_type(declared_type)) # application/octet-stream is treated as an undeclared/missing type, # allowing the type to be inferred from the filename. If there's no diff --git a/lib/marcel/mime_type/definitions.rb b/lib/marcel/mime_type/definitions.rb index 65db8bb..cde6eef 100644 --- a/lib/marcel/mime_type/definitions.rb +++ b/lib/marcel/mime_type/definitions.rb @@ -32,13 +32,14 @@ Marcel::MimeType.extend "application/vnd.apple.numbers", extensions: %w( numbers ), parents: "application/zip" Marcel::MimeType.extend "application/vnd.apple.keynote", extensions: %w( key ), parents: "application/zip" +# Upstream aliases to application/x-x509-cert. Override with a ;format=pem subtype. +Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem" + Marcel::MimeType.extend "audio/aac", extensions: %w( aac ), parents: "audio/x-aac" Marcel::MimeType.extend("audio/ogg", extensions: %w( ogg oga ), magic: [[0, 'OggS', [[29, 'vorbis']]]]) Marcel::MimeType.extend "image/vnd.dwg", magic: [[0, "AC10"]] -Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem" - Marcel::MimeType.extend "image/avif", magic: [[4, "ftypavif"]], extensions: %w( avif ) Marcel::MimeType.extend "image/heif", magic: [[4, "ftypmif1"]], extensions: %w( heif ) Marcel::MimeType.extend "image/heic", magic: [[4, "ftypheic"]], extensions: %w( heic ) @@ -49,7 +50,6 @@ Marcel::MimeType.extend "video/mp4", magic: [[4, "ftypisom"], [4, "ftypM4V "]], extensions: %w( mp4 m4v ) Marcel::MimeType.extend "audio/flac", magic: [[0, 'fLaC']], extensions: %w( flac ), parents: "audio/x-flac" -Marcel::MimeType.extend "audio/x-wav", magic: [[0, 'RIFF', [[8, 'WAVE']]]], extensions: %w( wav ), parents: "audio/vnd.wav" Marcel::MimeType.extend "audio/mpc", magic: [[0, "MPCKSH"]], extensions: %w( mpc ) Marcel::MimeType.extend "font/ttf", magic: [[0, "\x00\x01\x00\x00"]], extensions: %w( ttf ttc ) diff --git a/lib/marcel/tables.rb b/lib/marcel/tables.rb index 52e0290..2a3de0d 100644 --- a/lib/marcel/tables.rb +++ b/lib/marcel/tables.rb @@ -2148,6 +2148,149 @@ module Marcel 'video/x-sgi-movie' => %w(movie), 'x-conference/x-cooltalk' => %w(ice), # Cooltalk Audio } + TYPE_ALIASES = { + 'application/bat' => 'application/x-bat', + 'application/x-coreldraw' => 'application/coreldraw', + 'application/x-cdr' => 'application/coreldraw', + 'application/cdr' => 'application/coreldraw', + 'image/x-cdr' => 'application/coreldraw', + 'image/cdr' => 'application/coreldraw', + 'application/x-setupscript' => 'application/inf', + 'application/x-wine-extension-inf' => 'application/inf', + 'application/x-javascript' => 'application/javascript', + 'text/javascript' => 'application/javascript', + 'application/x-java-vm' => 'application/java-vm', + 'application/x-java' => 'application/java-vm', + 'application/mac-binhex' => 'application/mac-binhex40', + 'application/binhex' => 'application/mac-binhex40', + 'application/vnd.ms-word' => 'application/msword', + 'application/x-ogg' => 'audio/vorbis', + 'application/msonenote' => 'application/onenote', + 'application/x-pdf' => 'application/pdf', + 'application/pgp' => 'application/pgp-encrypted', + 'text/rss' => 'application/rss+xml', + 'text/rtf' => 'application/rtf', + 'application/smil' => 'application/smil+xml', + 'application/x-kchart' => 'application/vnd.kde.kchart', + 'application/x-kpresenter' => 'application/vnd.kde.kpresenter', + 'application/x-kspread' => 'application/vnd.kde.kspread', + 'application/x-kword' => 'application/vnd.kde.kword', + 'application/x-koan' => 'application/vnd.koan', + 'application/x-123' => 'application/vnd.lotus-1-2-3', + 'application/x-mif' => 'application/vnd.mif', + 'application/x-frame' => 'application/vnd.mif', + 'application/msexcel' => 'application/vnd.ms-excel', + 'application/mspowerpoint' => 'application/vnd.ms-powerpoint', + 'application/ms-tnef' => 'application/vnd.ms-tnef', + 'application/oxps' => 'application/vnd.ms-xpsdocument', + 'application/x-vnd.oasis.opendocument.chart' => 'application/vnd.oasis.opendocument.chart', + 'application/x-vnd.oasis.opendocument.chart-template' => 'application/vnd.oasis.opendocument.chart-template', + 'application/vnd.oasis.opendocument.database' => 'application/vnd.oasis.opendocument.base', + 'application/x-vnd.oasis.opendocument.formula' => 'application/vnd.oasis.opendocument.formula', + 'application/x-vnd.oasis.opendocument.formula-template' => 'application/vnd.oasis.opendocument.formula-template', + 'application/x-vnd.oasis.opendocument.graphics' => 'application/vnd.oasis.opendocument.graphics', + 'application/x-vnd.oasis.opendocument.graphics-template' => 'application/vnd.oasis.opendocument.graphics-template', + 'application/x-vnd.oasis.opendocument.image' => 'application/vnd.oasis.opendocument.image', + 'application/x-vnd.oasis.opendocument.image-template' => 'application/vnd.oasis.opendocument.image-template', + 'application/x-vnd.oasis.opendocument.presentation' => 'application/vnd.oasis.opendocument.presentation', + 'application/x-vnd.oasis.opendocument.presentation-template' => 'application/vnd.oasis.opendocument.presentation-template', + 'application/x-vnd.oasis.opendocument.spreadsheet' => 'application/vnd.oasis.opendocument.spreadsheet', + 'application/x-vnd.oasis.opendocument.spreadsheet-template' => 'application/vnd.oasis.opendocument.spreadsheet-template', + 'application/x-vnd.oasis.opendocument.text' => 'application/vnd.oasis.opendocument.text', + 'application/x-vnd.oasis.opendocument.text-master' => 'application/vnd.oasis.opendocument.text-master', + 'application/x-vnd.oasis.opendocument.text-template' => 'application/vnd.oasis.opendocument.text-template', + 'application/x-vnd.oasis.opendocument.text-web' => 'application/vnd.oasis.opendocument.text-web', + 'application/x-vnd.sun.xml.writer' => 'application/vnd.sun.xml.writer', + 'application/vnd.ms-visio' => 'application/vnd.visio', + 'image/x-targa' => 'image/x-tga', + 'application/x-unix-archive' => 'application/x-archive', + 'application/x-arj-compressed' => 'application/x-arj', + 'application/x-dbm' => 'application/x-berkeley-db', + 'application/vnd.debian.binary-package' => 'application/x-debian-package', + 'application/x-Gnumeric-spreadsheet' => 'application/x-gnumeric', + 'application/x-gzip' => 'application/gzip', + 'application/x-gunzip' => 'application/gzip', + 'application/gzipped' => 'application/gzip', + 'application/gzip-compressed' => 'application/gzip', + 'application/x-gzip-compressed' => 'application/gzip', + 'gzip/document' => 'application/gzip', + 'application/x-windows-installer' => 'application/x-ms-installer', + 'application/x-msi' => 'application/x-ms-installer', + 'application/x-rar' => 'application/x-rar-compressed', + 'text/x-tex' => 'application/x-tex', + 'text/x-texinfo' => 'application/x-texinfo', + 'application/x-x509-ca-cert' => 'application/x-x509-cert', + 'application/x-x509-user-cert' => 'application/x-x509-cert', + 'text/xml' => 'application/xml', + 'application/x-xml' => 'application/xml', + 'text/x-dtd' => 'application/xml-dtd', + 'text/xml-external-parsed-entity' => 'application/xml-external-parsed-entity', + 'text/xsl' => 'application/xslt+xml', + 'application/x-zip-compressed' => 'application/zip', + 'application/x-deflate' => 'application/zlib', + 'audio/x-m4a' => 'audio/mp4', + 'audio/x-mp4a' => 'audio/mp4', + 'audio/x-mpeg' => 'audio/mpeg', + 'audio/x-ogg-flac' => 'audio/x-oggflac', + 'audio/x-ogg-pcm' => 'audio/x-oggpcm', + 'application/x-speex' => 'audio/speex', + 'audio/aiff' => 'audio/x-aiff', + 'audio/x-realaudio' => 'audio/x-pn-realaudio', + 'audio/x-wav' => 'audio/vnd.wave', + 'audio/wave' => 'audio/vnd.wave', + 'audio/wav' => 'audio/vnd.wave', + 'image/x-bmp' => 'image/bmp', + 'image/x-ms-bmp' => 'image/bmp', + 'image/x-emf' => 'image/emf', + 'application/x-emf' => 'image/emf', + 'application/x-ms-emz' => 'image/x-emf-compressed', + 'image/hevc' => 'image/heic', + 'image/hevc-sequence' => 'image/heic-sequence', + 'video/jpm' => 'image/jpm', + 'image/ntf' => 'image/nitf', + 'image/x-psd' => 'image/vnd.adobe.photoshop', + 'application/photoshop' => 'image/vnd.adobe.photoshop', + 'image/x-dwg' => 'image/vnd.dwg', + 'application/acad' => 'image/vnd.dwg', + 'application/x-acad' => 'image/vnd.dwg', + 'application/autocad_dwg' => 'image/vnd.dwg', + 'application/dwg' => 'image/vnd.dwg', + 'application/x-dwg' => 'image/vnd.dwg', + 'application/x-autocad' => 'image/vnd.dwg', + 'drawing/dwg' => 'image/vnd.dwg', + 'image/x-icon' => 'image/vnd.microsoft.icon', + 'image/x-dcx' => 'image/vnd.zbrush.dcx', + 'image/x-pcx' => 'image/vnd.zbrush.pcx', + 'image/x-pc-paintbrush' => 'image/vnd.zbrush.pcx', + 'image/x-wmf' => 'image/wmf', + 'application/x-msmetafile' => 'image/wmf', + 'image/x-jb2' => 'image/x-jbig2', + 'image/xcf' => 'image/x-xcf', + 'application/x-mimearchive' => 'multipart/related', + 'message/rfc2557' => 'multipart/related', + 'drawing/x-dwf' => 'model/vnd.dwf', + 'text/x-asm' => 'text/x-assembly', + 'application/x-troff' => 'text/troff', + 'application/x-troff-man' => 'text/troff', + 'application/x-troff-me' => 'text/troff', + 'application/x-troff-ms' => 'text/troff', + 'text/x-c' => 'text/x-csrc', + 'text/x-java' => 'text/x-java-source', + 'text/x-properties' => 'text/x-java-properties', + 'text/properties' => 'text/x-java-properties', + 'application/x-httpd-jsp' => 'text/x-jsp', + 'application/matlab-mat' => 'application/x-matlab-data', + 'application/x-tcl' => 'text/x-tcl', + 'video/x-daala' => 'video/daala', + 'video/x-theora' => 'video/theora', + 'video/x-ogg-uvs' => 'video/x-ogguvs', + 'video/x-ogg-yuv' => 'video/x-oggyuv', + 'video/x-ogg-rgb' => 'video/x-oggrgb', + 'video/avi' => 'video/x-msvideo', + 'video/msvideo' => 'video/x-msvideo', + 'application/font-woff' => 'font/woff', + 'application/font-woff2' => 'font/woff2', + } TYPE_PARENTS = { 'application/bizagi-modeler' => %w(application/zip), 'application/dash+xml' => %w(application/xml), diff --git a/script/generate_tables.rb b/script/generate_tables.rb index 5ff10e3..27a632c 100755 --- a/script/generate_tables.rb +++ b/script/generate_tables.rb @@ -128,6 +128,7 @@ def get_matches(mime, parent) extensions = {} types = {} +aliases = {} magics = [] ARGV.each do |path| @@ -137,6 +138,7 @@ def get_matches(mime, parent) (doc/'mime-info/mime-type').each do |mime| comments = Hash[*(mime/'_comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten] type = mime['type'] + (mime/'alias').each { |x| aliases[x['type']] = type } subclass = (mime/'sub-class-of').map{|x| x['type']} exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact (mime/'magic').each do |magic| @@ -222,6 +224,11 @@ def get_matches(mime, parent) puts " '#{key}' => %w(#{exts}),#{comment}" end puts " }" +puts " TYPE_ALIASES = {" +aliases.each do |aliased, type| + puts " '#{aliased}' => '#{type}'," +end +puts " }" puts " TYPE_PARENTS = {" types.keys.sort.each do |key| parents = types[key][1].sort.join(' ') diff --git a/test/declared_type_test.rb b/test/declared_type_test.rb index d1feeac..0cf607e 100644 --- a/test/declared_type_test.rb +++ b/test/declared_type_test.rb @@ -19,4 +19,9 @@ class Marcel::MimeType::DeclaredTypeTest < Marcel::TestCase test "ignores charset declarations" do assert_equal "text/html", Marcel::MimeType.for(declared_type: "text/html; charset=utf-8") end + + test "resolves declared type to a canonical MIME type" do + aliased, canonical = Marcel::TYPE_ALIASES.first + assert_equal canonical, Marcel::MimeType.for(declared_type: aliased) + end end diff --git a/test/fixtures/magic/audio/x-wav.wav b/test/fixtures/magic/audio/vnd.wave.wav similarity index 100% rename from test/fixtures/magic/audio/x-wav.wav rename to test/fixtures/magic/audio/vnd.wave.wav diff --git a/test/fixtures/name/audio/x-wav.wav b/test/fixtures/name/audio/vnd.wave.wav similarity index 100% rename from test/fixtures/name/audio/x-wav.wav rename to test/fixtures/name/audio/vnd.wave.wav diff --git a/test/illustrator_test.rb b/test/illustrator_test.rb index 1f75805..bb96cff 100644 --- a/test/illustrator_test.rb +++ b/test/illustrator_test.rb @@ -2,6 +2,11 @@ require 'rack' class Marcel::MimeType::IllustratorTest < Marcel::TestCase + test ".ai uploaded as application/illustrator" do + file = files("name/application/illustrator/illustrator.ai") + assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/illustrator") + end + test ".ai uploaded as application/postscript" do file = files("name/application/illustrator/illustrator.ai") assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/postscript") diff --git a/test/magic_and_declared_type_test.rb b/test/magic_and_declared_type_test.rb index de3a189..b9efa9a 100644 --- a/test/magic_and_declared_type_test.rb +++ b/test/magic_and_declared_type_test.rb @@ -3,8 +3,14 @@ class Marcel::MimeType::MagicAndDeclaredTypeTest < Marcel::TestCase each_content_type_fixture('name') do |file, name, content_type| - test "correctly returns #{content_type} for #{name} given both file and declared type" do + test "detects #{content_type} given magic bytes from #{name} and declared type" do assert_equal content_type, Marcel::MimeType.for(file, declared_type: content_type) end + + ALIASED[content_type].each do |aliased| + test "detects #{content_type} given magic bytes from #{name} and aliased type #{aliased}" do + assert_equal content_type, Marcel::MimeType.for(file, declared_type: aliased) + end + end end end diff --git a/test/magic_and_name_test.rb b/test/magic_and_name_test.rb index a686a1a..8cae5f4 100644 --- a/test/magic_and_name_test.rb +++ b/test/magic_and_name_test.rb @@ -6,7 +6,7 @@ class Marcel::MimeType::MagicAndNameTest < Marcel::TestCase # the file contents and the name. In some cases, the file contents will point to a # generic type, while the name will choose a more specific subclass each_content_type_fixture('name') do |file, name, content_type| - test "correctly returns #{content_type} for #{name} given both file and name" do + test "detects #{content_type} given filename #{name} and its magic bytes" do assert_equal content_type, Marcel::MimeType.for(file, name: name) end end diff --git a/test/magic_test.rb b/test/magic_test.rb index 14d11e2..1d9e277 100644 --- a/test/magic_test.rb +++ b/test/magic_test.rb @@ -6,7 +6,7 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase # has more specific subclasses (such as application/zip), these subclasses cannot usually # be recognised by magic alone; their name is also needed to correctly identify them. each_content_type_fixture('magic') do |file, name, content_type| - test "gets type for #{content_type} by using only magic bytes #{name}" do + test "detects #{content_type} given magic bytes from #{name}" do assert_equal content_type, Marcel::MimeType.for(file) end end @@ -16,6 +16,32 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase Marcel::Magic.remove('application/x-my-thing') end + test "removing alias" do + Marcel::Magic.add('canonical/type', aliases: 'alias/type') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/type') + + Marcel::Magic.remove('alias/type') + assert_equal 'alias/type', Marcel::Magic.canonical('alias/type') + end + + test "removing canonical removes aliases" do + Marcel::Magic.add('canonical/type', aliases: %w[ alias/one alias/two ]) + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/one') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/two') + + Marcel::Magic.remove('canonical/type') + assert_equal 'alias/one', Marcel::Magic.canonical('alias/one') + assert_equal 'alias/two', Marcel::Magic.canonical('alias/two') + end + + test "adding type removes existing alias" do + Marcel::Magic.add('canonical/type', aliases: 'alias/type') + assert_equal 'canonical/type', Marcel::Magic.canonical('alias/type') + + Marcel::Magic.add('alias/type', comment: "overrides old alias") + assert_equal 'alias/type', Marcel::Magic.canonical('alias/type') + end + test "#extensions" do json = Marcel::Magic.by_extension('json') assert_equal ['json'], json.extensions @@ -25,4 +51,11 @@ class Marcel::MimeType::MagicTest < Marcel::TestCase assert Marcel::Magic.child?('text/csv', 'text/plain') refute Marcel::Magic.child?('text/plain', 'text/csv') end + + test "child? with aliases" do + Marcel::Magic.add('canonical/parent', aliases: 'alias/parent') + Marcel::Magic.add('canonical/child', aliases: 'alias/child', parents: 'canonical/parent') + + assert Marcel::Magic.child?('alias/child', 'alias/parent') + end end diff --git a/test/name_test.rb b/test/name_test.rb index 030046e..287e35f 100644 --- a/test/name_test.rb +++ b/test/name_test.rb @@ -3,7 +3,7 @@ class Marcel::MimeType::NameTest < Marcel::TestCase each_content_type_fixture('name') do |file, name, content_type| - test "gets type for #{content_type} by filename from #{name}" do + test "detects #{content_type} given filename #{name}" do assert_equal content_type, Marcel::MimeType.for(name: name) end end diff --git a/test/test_helper.rb b/test/test_helper.rb index 557ecd7..94b940e 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -7,6 +7,12 @@ rescue LoadError end +# Simplify testing. No need for reverse mapping at runtime. +ALIASED = Hash.new { |h, k| h[k] = [] } +Marcel::TYPE_ALIASES.each do |aliased, type| + ALIASED[type] << aliased +end + class Marcel::TestCase < Minitest::Test class << self def setup(&block)