diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..f40fbd8ba --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,5 @@ +_site +.sass-cache +.jekyll-cache +.jekyll-metadata +vendor diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 000000000..086a5c9ea --- /dev/null +++ b/docs/404.html @@ -0,0 +1,25 @@ +--- +permalink: /404.html +layout: default +--- + + + +
+

404

+ +

Page not found :(

+

The requested page could not be found.

+
diff --git a/docs/CNAME b/docs/CNAME new file mode 100644 index 000000000..340c8b3de --- /dev/null +++ b/docs/CNAME @@ -0,0 +1 @@ +fitstool.org \ No newline at end of file diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 000000000..9391d4204 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' +gem 'github-pages' +gem 'jekyll' +gem 'webrick' \ No newline at end of file diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock new file mode 100644 index 000000000..57cfc453b --- /dev/null +++ b/docs/Gemfile.lock @@ -0,0 +1,284 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (6.0.4.7) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + zeitwerk (~> 2.2, >= 2.2.2) + addressable (2.8.0) + public_suffix (>= 2.0.2, < 5.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.11.1) + colorator (1.1.0) + commonmarker (0.23.4) + concurrent-ruby (1.1.9) + dnsruby (1.61.9) + simpleidn (~> 0.1) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.15.0) + ffi (>= 1.15.0) + eventmachine (1.2.7) + execjs (2.8.1) + faraday (1.10.0) + faraday-em_http (~> 1.0) + faraday-em_synchrony (~> 1.0) + faraday-excon (~> 1.1) + faraday-httpclient (~> 1.0) + faraday-multipart (~> 1.0) + faraday-net_http (~> 1.0) + faraday-net_http_persistent (~> 1.0) + faraday-patron (~> 1.0) + faraday-rack (~> 1.0) + faraday-retry (~> 1.0) + ruby2_keywords (>= 0.0.4) + faraday-em_http (1.0.0) + faraday-em_synchrony (1.0.0) + faraday-excon (1.1.0) + faraday-httpclient (1.0.1) + faraday-multipart (1.0.3) + multipart-post (>= 1.2, < 3) + faraday-net_http (1.0.1) + faraday-net_http_persistent (1.2.0) + faraday-patron (1.0.0) + faraday-rack (1.0.0) + faraday-retry (1.0.3) + ffi (1.15.5) + forwardable-extended (2.6.0) + gemoji (3.0.1) + github-pages (225) + github-pages-health-check (= 1.17.9) + jekyll (= 3.9.0) + jekyll-avatar (= 0.7.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.2.0) + jekyll-default-layout (= 0.1.4) + jekyll-feed (= 0.15.1) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.13.0) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.12.0) + kramdown (= 2.3.1) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.3) + mercenary (~> 0.3) + minima (= 2.5.1) + nokogiri (>= 1.12.5, < 2.0) + rouge (= 3.26.0) + terminal-table (~> 1.4) + github-pages-health-check (1.17.9) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (~> 4.0) + public_suffix (>= 3.0, < 5.0) + typhoeus (~> 1.3) + html-pipeline (2.14.0) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.8.0) + i18n (0.9.5) + concurrent-ruby (~> 1.0) + jekyll (3.9.0) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 0.7) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + jekyll-avatar (0.7.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.1.1) + coffee-script (~> 2.2) + coffee-script-source (~> 1.11.1) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.2.0) + commonmarker (~> 0.23.4) + jekyll (~> 3.9.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 4.0) + jekyll-default-layout (0.1.4) + jekyll (~> 3.0) + jekyll-feed (0.15.1) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.13.0) + jekyll (>= 3.4, < 5.0) + octokit (~> 4.0, != 4.4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) + html-pipeline (~> 2.3) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.12.0) + gemoji (~> 3.0) + html-pipeline (~> 2.2) + jekyll (>= 3.0, < 5.0) + kramdown (2.3.1) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.3) + listen (3.7.1) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.3.6) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.15.0) + multipart-post (2.1.1) + nokogiri (1.13.3-x86_64-darwin) + racc (~> 1.4) + octokit (4.22.0) + faraday (>= 0.9) + sawyer (~> 0.8.0, >= 0.5.3) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (4.0.6) + racc (1.6.0) + rb-fsevent (0.11.1) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.5) + rouge (3.26.0) + ruby2_keywords (0.0.5) + rubyzip (2.3.2) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.8.2) + addressable (>= 2.3.5) + faraday (> 0.8, < 2.0) + simpleidn (0.2.1) + unf (~> 0.1.4) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + thread_safe (0.3.6) + typhoeus (1.4.0) + ethon (>= 0.9.0) + tzinfo (1.2.9) + thread_safe (~> 0.1) + unf (0.1.4) + unf_ext + unf_ext (0.0.8.1) + unicode-display_width (1.8.0) + webrick (1.7.0) + zeitwerk (2.5.4) + +PLATFORMS + x86_64-darwin-20 + +DEPENDENCIES + github-pages + jekyll + webrick + +BUNDLED WITH + 2.3.9 diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 000000000..f19c21de6 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,55 @@ +# Welcome to Jekyll! +# +# This config file is meant for settings that affect your whole blog, values +# which you are expected to set up once and rarely edit after that. If you find +# yourself editing this file very often, consider using Jekyll's data files +# feature for the data you need to update frequently. +# +# For technical reasons, this file is *NOT* reloaded automatically when you use +# 'bundle exec jekyll serve'. If you change this file, please restart the server process. +# +# If you need help with YAML syntax, here are some quick references for you: +# https://learn-the-web.algonquindesign.ca/topics/markdown-yaml-cheat-sheet/#yaml +# https://learnxinyminutes.com/docs/yaml/ +# +# Site settings +# These are used to personalize your new site. If you look in the HTML files, +# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on. +# You can create any custom variable you would like, and they will be accessible +# in the templates via {{ site.myvariable }}. + +title: File Information Tool Set (FITS) +email: fits-users@googlegroups.com +description: >- # this means to ignore newlines until "baseurl:" + Documentation and official code release of the FITS and the FITS Web Service projects +baseurl: "/fits" # the subpath of your site, e.g. /blog +url: "https://harvard-lts.github.io" # the base hostname & protocol for your site, e.g. http://example.com +github_reponame_fits: FITS +github_url_fits: "https://github.com/harvard-lts/fits" +github_reponame_fits-servlet: FITSservlet +github_url_fits-servlet: "https://github.com/harvard-lts/FITSservlet" + +# Build settings +theme: minima +plugins: + - jekyll-feed + +# Exclude from processing. +# The following items will not be processed, by default. +# Any item listed under the `exclude:` key here will be automatically added to +# the internal "default list". +# +# Excluded items can be processed by explicitly listing the directories or +# their entries' file path in the `include:` list. +# +# exclude: +# - .sass-cache/ +# - .jekyll-cache/ +# - gemfiles/ +# - Gemfile +# - Gemfile.lock +# - node_modules/ +# - vendor/bundle/ +# - vendor/cache/ +# - vendor/gems/ +# - vendor/ruby/ diff --git a/docs/_includes/data-dictionary-unit.md b/docs/_includes/data-dictionary-unit.md new file mode 100644 index 000000000..0e777c9af --- /dev/null +++ b/docs/_includes/data-dictionary-unit.md @@ -0,0 +1,27 @@ +{% assign sorted_posts = site.categories.data-dictionary | sort_natural: 'semantic-unit' %} + +{% for post in sorted_posts %} + +
+ +#### {{ post.semantic-unit }} + +| --------- | ----------- | +| **Semantic unit** | {% if post.semantic-unit %} {{ post.semantic-unit }} {% endif %} | +| **Semantic components** | {% if post.semantic-components %} {{ post.semantic-components }} {% endif %} | +| **Definition** | {% if post.definition %} {{ post.definition }} {% endif %} | +| **Rationale** | {% if post.rationale %} {{ post.rationale }} {% endif %} | +| **Data constraint** | {% if post.data-constraint %} {{ post.data-constraint }} {% endif %} | +| **Repeatability** | {% if post.repeatability %} {{ post.repeatability }} {% endif %} | +| **Obligation** | {% if post.obligation %} {{ post.obligation }} {% endif %} | +| **Usage notes** | {% if post.usage-notes %}{{ post.usage-notes }}{% endif %} | +{% if post.metadata-standard %}| **Metadata standard** | {{ post.metadata-standard }} | +| **Short name** | {{ post.short-name }} | +| **Maintenance organization** | {{ post.maintenance-organization }} | +| **Website** | [{{ post.website }}]({{ post.website }}) |{% endif %} + +{% if post.content %}{{ post.content }}{% endif %} + +
+ +{% endfor %} \ No newline at end of file diff --git a/docs/_includes/footer.html b/docs/_includes/footer.html new file mode 100644 index 000000000..f03a611c3 --- /dev/null +++ b/docs/_includes/footer.html @@ -0,0 +1,48 @@ + \ No newline at end of file diff --git a/docs/_includes/head.html b/docs/_includes/head.html new file mode 100644 index 000000000..52685e2ed --- /dev/null +++ b/docs/_includes/head.html @@ -0,0 +1,12 @@ + + + + + {%- seo -%} + + + {%- feed_meta -%} + {%- if jekyll.environment == 'production' and site.google_analytics -%} + {%- include google-analytics.html -%} + {%- endif -%} + \ No newline at end of file diff --git a/docs/_includes/header.html b/docs/_includes/header.html new file mode 100644 index 000000000..97019fc87 --- /dev/null +++ b/docs/_includes/header.html @@ -0,0 +1,32 @@ + diff --git a/docs/_includes/tool.md b/docs/_includes/tool.md new file mode 100644 index 000000000..f0a068f64 --- /dev/null +++ b/docs/_includes/tool.md @@ -0,0 +1,26 @@ +{% assign sorted_posts = site.categories.tools | sort_natural: 'title' %} + +{% for post in sorted_posts %} + +
+ +{% if post.more-info-url %} + +#### [{{ post.title }}]({{ post.more-info-url }}) + +{% else %} + +#### {{ post.title }} + +{% endif %} + +| --------- | ----------- | +| **Maintenance organization** | {% if post.maintenance-organization %} {{ post.maintenance-organization }} {% endif %} | +| **Capabilities** | {% if post.capabilities %} {{ post.capabilities }} {% endif %} | +| **Formats supported** | {% if post.formats %} {{ post.formats }} {% endif %} | +| **Description** | {% if post.description %} {{ post.description }} {% endif %} | +| **Usage notes** | {% if post.usage-note %}{{post.usage-note}}{% endif %} | + +
+ +{% endfor %} \ No newline at end of file diff --git a/docs/_layouts/documentation.html b/docs/_layouts/documentation.html new file mode 100644 index 000000000..052d7e9de --- /dev/null +++ b/docs/_layouts/documentation.html @@ -0,0 +1,18 @@ +--- +layout: default +--- + +
+ +
+

{{ page.title }}

+ {% if page.subtitle %}

{{ page.subtitle }}

{% endif %} +
+ +
+
+ {{ content }} +
+
+ +
\ No newline at end of file diff --git a/docs/_layouts/home.html b/docs/_layouts/home.html new file mode 100644 index 000000000..675676770 --- /dev/null +++ b/docs/_layouts/home.html @@ -0,0 +1,34 @@ +--- +layout: default +--- + +
+ {%- if page.title -%} +

{{ page.title }}

+ {%- endif -%} + + {{ content }} + + + +
\ No newline at end of file diff --git a/docs/_layouts/page.html b/docs/_layouts/page.html new file mode 100644 index 000000000..4ba8cdeb9 --- /dev/null +++ b/docs/_layouts/page.html @@ -0,0 +1,16 @@ +--- +layout: default +--- + +
+ +
+

{{ page.title }}

+ {% if page.subtitle %}

{{ page.subtitle }}

{% endif %} +
+ +
+ {{ content }} +
+ +
\ No newline at end of file diff --git a/docs/_layouts/presentation.html b/docs/_layouts/presentation.html new file mode 100644 index 000000000..161185383 --- /dev/null +++ b/docs/_layouts/presentation.html @@ -0,0 +1,16 @@ +--- +layout: default +--- + +
+

{{ page.title | escape }}

+
+ +
+ {% if page.date %}

Presentation Date: {{ page.date }}

{% endif %} + {% if page.location %}

Location: {{ page.location }}

{% endif %} + {% if page.presentors %}

Presentor(s): {{ page.presentors }}

{% endif %} + {% if page.link %}

Presentation Slides

{% endif %} +
+ +{{ content }} diff --git a/docs/_posts/2009-05-18-fits-poster.md b/docs/_posts/2009-05-18-fits-poster.md new file mode 100644 index 000000000..efc805078 --- /dev/null +++ b/docs/_posts/2009-05-18-fits-poster.md @@ -0,0 +1,13 @@ +--- +layout: post +categories: blog +title: Video support in FITS +date: 2015-06-01 +# author: author +--- + +Harvard is in the process of integrating the [MediaInfo](https://mediaarea.net/en/MediaInfo) tool into FITS! When complete, the tool will provide format identification and technical metadata extraction for video file formats, allow FITS XML to represent technical metadata for video file formats, and support the creation of [EBUCore](https://tech.ebu.ch/MetadataEbuCore) XML metadata through the use of the embedded OTS package. + +It is expected that the first public release will be made in summer 2016, in support of a Harvard project to add video preservation and access services to the Harvard Digital Repository Service, a project that has been made possible through the generous support of the Arcadia Fund. + +MediaInfo and EBUCore were selected for video format analysis and technical metadata delivery in consultation with [AVPreserve](http://www.avpreserve.com/). \ No newline at end of file diff --git a/docs/_posts/2009-09-01-new-tool.md b/docs/_posts/2009-09-01-new-tool.md new file mode 100644 index 000000000..dc71a9a51 --- /dev/null +++ b/docs/_posts/2009-09-01-new-tool.md @@ -0,0 +1,9 @@ +--- +layout: post +categories: blog +title: A New Tool for Digital Preservation Repositories +date: 2009-09-01 +# author: author +--- + +It was announced in the [September/October 2009 D-Lib In Brief](http://www.dlib.org/dlib/september09/09inbrief.html) that FITS was made available as an open source tool. \ No newline at end of file diff --git a/docs/_posts/2010-10-01-fits-bpe-presentation.md b/docs/_posts/2010-10-01-fits-bpe-presentation.md new file mode 100644 index 000000000..13b08f8b1 --- /dev/null +++ b/docs/_posts/2010-10-01-fits-bpe-presentation.md @@ -0,0 +1,9 @@ +--- +layout: presentation +categories: blog +title: FITS - The File Information Tool Set, Presentation +date: 2010-10-01 +location: Best Practices Exchange, Phoenix, Arizona +presentors: Spencer McEwen +# link: https://projects.iq.harvard.edu/files/fits/files/fits_poster_final.pdf +--- \ No newline at end of file diff --git a/docs/_posts/2011-06-09-fits-tool-belt-presentation.md b/docs/_posts/2011-06-09-fits-tool-belt-presentation.md new file mode 100644 index 000000000..89bebc5e6 --- /dev/null +++ b/docs/_posts/2011-06-09-fits-tool-belt-presentation.md @@ -0,0 +1,9 @@ +--- +layout: presentation +categories: blog +title: Free Tools for your Preservation Tool Belt, Presentation +date: 2011-06-09 +location: Open Repositories 2011, Austin, Texas +presentors: Randy Stern +link: https://projects.iq.harvard.edu/files/fits/files/drs2_preservation_tools-3.pdf +--- \ No newline at end of file diff --git a/docs/_posts/2012-07-25-fits-demo-presentation.md b/docs/_posts/2012-07-25-fits-demo-presentation.md new file mode 100644 index 000000000..4a18d4f01 --- /dev/null +++ b/docs/_posts/2012-07-25-fits-demo-presentation.md @@ -0,0 +1,9 @@ +--- +layout: presentation +categories: blog +title: FITS Demo, Presentation +date: 2012-07-25 +location: Digital Preservation 2012, Washington DC +presentors: Andrea Goethals +# link: https://projects.iq.harvard.edu/files/fits/files/drs2_preservation_tools-3.pdf +--- \ No newline at end of file diff --git a/docs/_posts/2013-04-29-fits-webinar.md b/docs/_posts/2013-04-29-fits-webinar.md new file mode 100644 index 000000000..aa8d9e777 --- /dev/null +++ b/docs/_posts/2013-04-29-fits-webinar.md @@ -0,0 +1,9 @@ +--- +layout: presentation +categories: blog +title: Using FITS to Identify File Formats and Extract Metadata, Webinar +date: 2013-04-29 +location: ASERL-sponsored webinar +presentors: Andrea Goethals +# link: https://projects.iq.harvard.edu/files/fits/files/drs2_preservation_tools-3.pdf +--- \ No newline at end of file diff --git a/docs/_posts/2013-11-20-fits-blitz.md b/docs/_posts/2013-11-20-fits-blitz.md new file mode 100644 index 000000000..d814874c2 --- /dev/null +++ b/docs/_posts/2013-11-20-fits-blitz.md @@ -0,0 +1,12 @@ +--- +layout: post +categories: blog +title: FITS Blitz +date: 2013-11-20 +# author: author +--- + +We're almost finished with the 2-week project to merge the different forks of FITS on GitHub and to make it easier for others to contribute code to the FITS test base. This was done as a joint project between Harvard Library and the Open Planets Foundation with funding by the SPRUCE project. Learn more about the FITS blitz at: + +- [Paul Wheatley's blog post on the Open Planets blog](http://www.openplanetsfoundation.org/blogs/2013-11-06-fits-blitz) +- [The announcement in the Harvard Library newsletter](http://library.harvard.edu/11152013-1810/fits-blitz) \ No newline at end of file diff --git a/docs/_posts/2015-06-01-video-support copy.md b/docs/_posts/2015-06-01-video-support copy.md new file mode 100644 index 000000000..1c8a739af --- /dev/null +++ b/docs/_posts/2015-06-01-video-support copy.md @@ -0,0 +1,9 @@ +--- +layout: presentation +categories: blog +title: FITS – The File Information Tool Set, Poster +date: 2009-05-18 +location: Open Repositories 2009, Georgia Institute of Technology +presentors: Spencer McEwen and Randy Stern +link: https://projects.iq.harvard.edu/files/fits/files/fits_poster_final.pdf +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-adl-tool.md b/docs/_posts/2022-03-20-adl-tool.md new file mode 100644 index 000000000..9d3a0dfdb --- /dev/null +++ b/docs/_posts/2022-03-20-adl-tool.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: ADL Tool +maintenance-organization: Harvard Library +capabilities: Identifies and extracts edit decision lists from audio files. +formats: Audio Decision List files +description: The ADL tool acts on ADL files and applies the Audio Engineering Society (AES) standard for transfer and exchange of edit data. +usage-note: Audio Decision List files support interchange of audio files and projects and the ADL Tool extracts this data from audio files. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-apache-tika.md b/docs/_posts/2022-03-20-apache-tika.md new file mode 100644 index 000000000..39b40831c --- /dev/null +++ b/docs/_posts/2022-03-20-apache-tika.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: Apache Tika +maintenance-organization: Apache +capabilities: Identifies file formats +formats: See full list of [supported formats](https://tika.apache.org/1.19/formats.html) +description: Tika extracts text and metadata from hundreds of file formats. +usage-note: While Tika can parse hundreds of formats, FITS uses Tika primarily to extract technical metadata from document-type files. +more-info-url: http://tika.apache.org/ +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-droid.md b/docs/_posts/2022-03-20-droid.md new file mode 100644 index 000000000..f0a8048aa --- /dev/null +++ b/docs/_posts/2022-03-20-droid.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: DROID +maintenance-organization: UK National Archives +capabilities: Profiles a range of file formats and identifies version, age, size, and date of last modification. +formats: Supports over 1,000 formats, which are listed in the DROID signature file +description: The core function of DROID is accurate file format identification, even if the files are missing extensions or if they are in a container file. DROID is written in Java. +usage-note: The FITS tool wrapper uses the provided API. The output is converted into a simple XML document and then converted to FITS XML using xml/droid/droid_to_fits.xslt. The DROID configuration file and signature file are located in the tools/droid directory. +more-info-url: https://www.nationalarchives.gov.uk/information-management/manage-information/policy-process/digital-continuity/file-profiling-tool-droid/ +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-embARC.md b/docs/_posts/2022-03-20-embARC.md new file mode 100644 index 000000000..7b2d0c652 --- /dev/null +++ b/docs/_posts/2022-03-20-embARC.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: embARC +maintenance-organization: Library of Congress +capabilities: Identify and extract complete metadata from SMPTE DPX image files. +formats: dpx +description: embARC is written in Java. The FITS tool wrapper uses the provided API. Raw tool output is provided in XML format. +usage-note: Although the standalone embARC application processes DPX sequences natively, this integration with FITS only supports the processing of DPX files individually and not as a sequence. +more-info-url: https://www.digitizationguidelines.gov/guidelines/embARC.html +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-exifTool.md b/docs/_posts/2022-03-20-exifTool.md new file mode 100644 index 000000000..c1b3f5010 --- /dev/null +++ b/docs/_posts/2022-03-20-exifTool.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: ExifTool +maintenance-organization: Phil Harvey +capabilities: Identifies and extracts technical metadata. +formats: jpg, tiff, jp2, gif, bmp, png, psd, dng, wav, mp3, mp4, m4a, aiff, rm, ogg, flac, xml, html, pdf, doc +description: Exiftool is written in Perl. A windows executable is also provided. The Exiftool tool wrapper detects the operating system type and calls the appropriate version of the tool. +usage-note: The tab-delimited output is captured, converted to a simple XML structure, and then converted to FITS XML using xslt. xml/exiftool/exiftool_xslt_map.xml is used to determine which XSLT to apply for the given identified format. +more-info-url: https://exiftool.org/ +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-ffident.md b/docs/_posts/2022-03-20-ffident.md new file mode 100644 index 000000000..c9f1bbf87 --- /dev/null +++ b/docs/_posts/2022-03-20-ffident.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: ffident ([archived site](http://web.archive.org/web/20061106114156/http://schmidt.devlib.org/ffident/index.html)) +maintenance-organization: no longer maintained +capabilities: Identifies file formats. +formats: Listed in the configuration file [tools/ffident/formats.txt](https://github.com/harvard-lts/fits/blob/dev/tools/ffident/formats.txt) +description: FFIdent is written in Java. +usage-note: The FITS tool wrapper uses the provided API. Output is converted into a simple XML document and then converted to FITS XML using xml/ffident/ffident_to_fits.xslt. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-file-utility.md b/docs/_posts/2022-03-20-file-utility.md new file mode 100644 index 000000000..5b5ce33b8 --- /dev/null +++ b/docs/_posts/2022-03-20-file-utility.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: File utility ([windows port](http://gnuwin32.sourceforge.net/)) +maintenance-organization: GnuWin project +capabilities: Identifies files. +formats: many (> 1,000) +description: File Utility is a package that is usually bundled with Linux, UNIX and OS X. The GnuWin32? port is provided for use on Windows. Due to variations in versions this may cause different output when run on different platforms. File Utility is called in its default mode (no arguments), and also with -i to determine the MIME type. +usage-note: The output is converted into a simple XML document and then converted to FITS XML using xml/fileutility/fileutility_to_fits.xslt +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-jhove.md b/docs/_posts/2022-03-20-jhove.md new file mode 100644 index 000000000..80342fe4f --- /dev/null +++ b/docs/_posts/2022-03-20-jhove.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: Jhove +maintenance-organization: Open Preservation Foundation +capabilities: Identifies, extracts technical metadata, and validates files. +formats: jpg, tiff, jp2, gif, wave, aiff, xml, html, ascii, utf-8, pdf +description: JHOVE is a software framework for format identification, validation, and characterization of digital objects. JHOVE is written in Java. JHOVE does not validate the codestream but it checks the file structure. +usage-note: The FITS tool wrapper uses the provided API. The JHOVE XML output is converted to FITS XML using XSLT. xml/jhove/jhove_xslt_map.xml is used to determine which XSLT to apply for the given identified format.

Note - For JP2 files the JHOVE output element Transformation indicates whether the compression is lossy or lossless. The transformation values are described in Table A-20 of the JPEG2000 part 1 specification. A value of 0 maps to the 9-7 irreversible (lossy) filter. A value of 1 maps to 5-3 reversible (lossless) filter. This JHOVE element is used by FITS when it outputs the compressionScheme in the image metadata, writing it as JPEG 2000 Lossy or JPEG 2000 Lossless. +more-info-url: https://github.com/openpreserve/jhove +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-mediaInfo.md b/docs/_posts/2022-03-20-mediaInfo.md new file mode 100644 index 000000000..c5b94ab0f --- /dev/null +++ b/docs/_posts/2022-03-20-mediaInfo.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: MediaInfo +maintenance-organization: MediaArea.net +capabilities: Identifies and extracts technical metadata for video files. +formats: Although MediaInfo supports many video formats, FITS will only support the following video formats and wrappers out of the box - avi, mov, mpg, mpeg, mkv, mp4, mxf, ogv, mj2, divx, dv, m4v, m2v, ism. +description: The MediaInfo API is written in C++ and is called via Java by using the [JNA library](https://github.com/java-native-access/jna). +usage-note: The FITS tool wrapper uses the MediaInfo API. The MediaInfo XML output is converted to FITS XML using XSLT. +more-info-url: https://mediaarea.net/en/MediaInfo +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-metadata-extractor.md b/docs/_posts/2022-03-20-metadata-extractor.md new file mode 100644 index 000000000..9d79a1684 --- /dev/null +++ b/docs/_posts/2022-03-20-metadata-extractor.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: Metadata Extractor Tool +maintenance-organization: National Library of New Zealand +capabilities: Identifies and extracts technical metadata. +formats: jpg, tiff, gif, bmp, wav, mp3, xml, html, pdf, doc, wordperfect, msworks, odt +description: The FITS NLNZ tool wrapper uses the provided Java API. +usage-note: The NLNZ native XML output is converted to FITS XML using XSLT. xml/nlnz/fits/nlnz_xslt_map.xml is used to determine which XSLT to apply to the given identified format. +more-info-url: https://github.com/DIA-NZ/Metadata-Extraction-Tool +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-ois-audio.md b/docs/_posts/2022-03-20-ois-audio.md new file mode 100644 index 000000000..040cd9748 --- /dev/null +++ b/docs/_posts/2022-03-20-ois-audio.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: OIS Audio Information +maintenance-organization: Harvard Library +capabilities: Identifies and extracts technical metadata +formats: audio +description: The OIS Audio Information tool identifies audio formats and applies AES metadata standards to extracted technical metadata. +usage-note: OIS Audio Information creates FITS XML without further normalization. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-ois-file.md b/docs/_posts/2022-03-20-ois-file.md new file mode 100644 index 000000000..1d31ce6f5 --- /dev/null +++ b/docs/_posts/2022-03-20-ois-file.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: OIS File Information +maintenance-organization: Harvard Library +capabilities: Extracts technical metadata. +formats: any +description: FileInfo creates FITS XML without further normalization. It determines basic file information like file name, size, file system last modified date, and md5 checksums. It uses the fast md5 jar from [http://www.twmacinta.com/myjava/fast_md5.php](http://www.twmacinta.com/myjava/fast_md5.php). +usage-note: OIS FileInfo creates FITS XML without further normalization. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-ois-xml.md b/docs/_posts/2022-03-20-ois-xml.md new file mode 100644 index 000000000..268dfef84 --- /dev/null +++ b/docs/_posts/2022-03-20-ois-xml.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: OIS XML Information +maintenance-organization: Harvard Library +capabilities: Identifies and extracts technical metadata. +formats: XML +description: The OIS XML Information tool identifies XML and parses out the default namespace and schema location. +usage-note: XmlMetadata creates FITS XML without further normalization. Used for FITS text metadata. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-20-vtt.md b/docs/_posts/2022-03-20-vtt.md new file mode 100644 index 000000000..a527f1273 --- /dev/null +++ b/docs/_posts/2022-03-20-vtt.md @@ -0,0 +1,11 @@ +--- +layout: post +categories: tools +title: VTT Tool +maintenance-organization: Harvard Library +capabilities: Extracts and validates VTT video caption files +formats: video +description: VTT Tool is written in Java. +usage-note: VTT Tool creates FITS XML without further normalization. +# more-info-url: more information at website +--- \ No newline at end of file diff --git a/docs/_posts/2022-03-22-audio.md b/docs/_posts/2022-03-22-audio.md new file mode 100644 index 000000000..6593ba751 --- /dev/null +++ b/docs/_posts/2022-03-22-audio.md @@ -0,0 +1,106 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.01 Audio elements +semantic-components: See [dropdown](#semantic-audio) below table for list of components +definition: This section contains technical metadata for audio files. The tools will extract the raw output of the file, and FITS normalizes and consolidates the output into the standard elements. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Not repeatable +obligation: Automatic +usage-notes: The list of audio elements represents the potential properties of a given file. The actual elements depend on what the tools are able to determine for the file. +metadata-standard: AES standard for audio metadata – Audio object structures for preservation and restoration +short-name: AES Audio Object +maintenance-organization: Audio Engineering Society, Inc. (AES) +website: http://www.aes.org/standards/blog/2011/9/aes57-2011-metadata-audio-object +--- + +
+4.01 Audio semantic components + +4.01.01 +: audioDataEncoding + +4.01.02 +: avgBitRate + +4.01.03 +: avgPacketSize + +4.01.04 +: bitDepth + +4.01.05 +: bitRate + +4.01.06 +: blockAlign + +4.01.07 +: blockSizeMax + +4.01.08 +: blockSizeMin + +4.01.09 +: byteOrder + +4.01.10 +: channels + +4.01.11 +: duration + +4.01.12 +: maxBitRate + +4.01.13 +: maxPacketSize + +4.01.14 +: numPackets + +4.01.15 +: numSamples + +4.01.16 +: offset + +4.01.17 +: sampleRate + +4.01.18 +: software + +4.01.19 +: soundField + +4.01.20 +: time + +4.01.21 +: wordSize + +
+ +##### Example + +``` + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-container.md b/docs/_posts/2022-03-22-container.md new file mode 100644 index 000000000..991c42e15 --- /dev/null +++ b/docs/_posts/2022-03-22-container.md @@ -0,0 +1,45 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.06 Container elements +semantic-components: N/A +definition: This section identifies the standard used to wrap file-specific sections. +rationale: The container element can extend established schemas or wrap the output of a characterization tool. +data-constraint: Container +repeatability: Repeatable +obligation: Automatic +usage-notes: The containerMD standard contains a description of the container and two levels of verbosity. it can be used to extend certain container formats, and the container element can include format-specific technical metadata. +metadata-standard: ContainerMD +short-name: ContainerMD +maintenance-organization: Bibliothèque Nationale de France +website: http://bibnum.bnf.fr/containerMD-v1_1/index.html +--- + +##### Example + +``` + + + 34318329 + deflate + + + + + + + + + + + + + + + + + + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-document.md b/docs/_posts/2022-03-22-document.md new file mode 100644 index 000000000..757a35c13 --- /dev/null +++ b/docs/_posts/2022-03-22-document.md @@ -0,0 +1,72 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.02 Document elements +semantic-components: See [dropdown](#semantic-document) below table for list of components +definition: This section contains technical metadata for document files. The tools will extract the raw output of the file, and FITS normalizes and consolidates the output into the standard elements. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Not repeatable +obligation: Automatic +usage-notes: The list of document elements represents the potential properties of a given file. The actual elements depend on what the tools are able to determine for the file. +metadata-standard: Document Metadata - document technical metadata for digital preservation +short-name: DocumentMD +maintenance-organization: Florida Virtual Campus and Harvard Library +website: http://www.fcla.edu/dls/md/docmd +--- + +
+4.02 Document semantic components + +4.02.01 +: author + +4.02.02 +: hasAnnotations + +4.02.03 +: hasOutline + +4.02.04 +: isProtected + +4.02.05 +: isRightsManaged + +4.02.06 +: isTagged + +4.02.07 +: language + +4.02.08 +: pageCount + +4.02.09 +: title + +
+ +##### Example + +``` + + + 2 + 141 + 805 + Zakuta, Vitaly + 6 + 1 + + + 2 + 141 + 805 + 1 + 6 + + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-fileInfo.md b/docs/_posts/2022-03-22-fileInfo.md new file mode 100644 index 000000000..f4396f6eb --- /dev/null +++ b/docs/_posts/2022-03-22-fileInfo.md @@ -0,0 +1,30 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 2.0 fileInfo +semantic-components: 2.01filepath
2.02filename
2.03size
2.04md5checksum
2.05lastmodified
2.06fslastmodified
2.07created
2.08creatingApplicationName
2.09creatingApplicationversion
2.10inhibitorType
2.11rightsBasis
2.12copyrightBasis
2.13copyrightNote
2.14creatingos +definition: This section contains basic technical metadata not specific to any format +rationale: Each file processed with FITS should have unique file properties to support use and rendering. +data-constraint: Container +repeatability: Repeatable +obligation: Automatic +usage-notes: Each of the above elements will carry toolname and toolversion attributes to record the name of the tool that is the source of the information. In most cases there will also be a status attribute value equal to SINGLE_RESULT which means that there wasn't any conflicting information output by a tool. In some cases, for example if tools reported different file creation dates there will be a status value of CONFLICT. +--- + + + +##### Example + +``` + + 43152124 + Adobe Photoshop CS6 (Windows) + 2013:12:16 13:24:36 + 2013-12-16T07:24:36 + 2013:12:16 13:23:37-06:00 + /Users/UserID/Desktop/Sample_0001.tif + 0010_Adams_0001-006-1857-02-21_001.tif + 0c8c66bcc9614cd280f44a0ab8181811 + 1387221878000 + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-fileStatus.md b/docs/_posts/2022-03-22-fileStatus.md new file mode 100644 index 000000000..49514e248 --- /dev/null +++ b/docs/_posts/2022-03-22-fileStatus.md @@ -0,0 +1,21 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 3.0 fileStatus +semantic-components: 3.01messageElements
3.01.1well-formed
3.01.2valid
3.01.3message +definition: This section contains validity information if the tools are able to identify a valid format. +rationale: Each file processed with FITS should have a declaration of validity. +data-constraint: Container +repeatability: Not repeatable +obligation: Automatic +usage-notes: Well-formed and valid elements will indicate a boolean value (true or false) depending on the validation status of the file. +--- + +##### Example + +``` + + true + true + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-identification.md b/docs/_posts/2022-03-22-identification.md new file mode 100644 index 000000000..4c480f210 --- /dev/null +++ b/docs/_posts/2022-03-22-identification.md @@ -0,0 +1,40 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 1.0 identification +semantic-components: 1.1identity
1.2tool
1.3version
1.4externalIdentifier +definition: This section contains the file format in one or more identity blocks. +rationale: Each file processed with FITS should have a unique format to support use and rendering. +data-constraint: Container +repeatability: Not repeatable +obligation: Automatic +usage-notes: If all the tools that processed the file and could identify it came up with the same format, there will only be one identity block. If there were tools that processed the file that came up with an alternative format, there will be multiple identity blocks. The tools that identified the format will be nested within the identity elements.

If multiple tools disagree on a format identity or other metadata values, a status attribute is added to the element with a value of "CONFLICT". If only a single tool reports a format identity or other metadata value, a status attribute is added to the element with a value of "SINGLE_RESULT". If multiple tools agree on an identity or value, and none disagree, the status attribute is omitted. A "PARTIAL" value is written when the format can only be partially identified, for example a format name is identified but not a MIME media type. +--- + +##### Example 1: Successful format identification + +In this example, two tools (Jhove 1.5 and file utility 5.04) identified the format as Plain text with a MIME media type of text/plain. + +``` + + + + + + +``` + +##### Example 2: Format conflict + +In this example, there is a "format conflict". The tool Exiftool 9.13 identified the format as PCD with MIME media type image/x-photo-cd, but the tool Tika 1.3 identified the format as MPEG-1 Audio Layer 3. Notice in this case that the identification element will carry an attribute status value of CONFLICT. + +``` + + + + + + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-image.md b/docs/_posts/2022-03-22-image.md new file mode 100644 index 000000000..be9831b28 --- /dev/null +++ b/docs/_posts/2022-03-22-image.md @@ -0,0 +1,309 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.03 Image elements +semantic-components: See [dropdown](#semantic-image) below table for list of components +definition: This section contains technical metadata for image files. The tools will extract the raw output of the file, and FITS normalizes and consolidates the output into the standard elements. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Repeatable +obligation: Automatic +usage-notes: The list of image elements represents the potential properties of a given file. The actual elements depend on what the tools are able to determine for the file. +metadata-standard: NISO Metadata for Images in XML Schema +short-name: MIX +maintenance-organization: Library of Congress (for NISO) +website: http://www.loc.gov/standards/mix/ +--- + +
+4.03 Image semantic components + +4.03.01 +: apertureValue + +4.03.02 +: bitsPerSample + +4.03.03 +: brightnessValue + +4.03.04 +: byteOrder + +4.03.05 +: captureDevice + +4.03.06 +: cfaPattern + +4.03.07 +: cfaPattern2 + +4.03.08 +: colorMap + +4.03.09 +: colorSpace + +4.03.10 +: compressionScheme + +4.03.11 +: digitalCameraManufacturer + +4.03.12 +: digitalCameraModelName + +4.03.13 +: digitalCameraSerialNo + +4.03.14 +: exifVersion + +4.03.15 +: exposureBiasValue + +4.03.16 +: exposureIndex + +4.03.17 +: exposureProgram + +4.03.18 +: exposureTime + +4.03.19 +: extraSamples + +4.03.20 +: flash + +4.03.21 +: flashEnergy + +4.03.22 +: fNumber + +4.03.23 +: focalLength + +4.03.24 +: gpsAltitudeRef + +4.03.25 +: gpsAltitude + +4.03.26 +: gpsAreaInformation + +4.03.27 +: gpsDateStamp + +4.03.28 +: gpsDestBearing + +4.03.29 +: gpsDestBearingRef + +4.03.30 +: gpsDestDistance + +4.03.31 +: gpsDestDistanceRef + +4.03.32 +: gpsDestLatitude + +4.03.33 +: gpsDestLatitudeRef + +4.03.34 +: gpsDestLongitude + +4.03.35 +: gpsDestLongitudeRef + +4.03.36 +: gpsDifferential + +4.03.37 +: gpsDOP + +4.03.38 +: gpsImgDirection + +4.03.39 +: gpsImgDirectionRef + +4.03.40 +: gpsLatitude + +4.03.41 +: gpsLatitudeRef + +4.03.42 +: gpsLongitude + +4.03.43 +: gpsLongitudeRef + +4.03.44 +: gpsMapDatum + +4.03.45 +: gpsMeasureMode + +4.03.46 +: gpsProcessingMethod + +4.03.47 +: gpsSatellites + +4.03.48 +: gpsSpeed + +4.03.49 +: gpsSpeedRef + +4.03.50 +: gpsStatus + +4.03.51 +: gpsTimeStamp + +4.03.52 +: gpsTrack + +4.03.53 +: gpsTrackRef + +4.03.54 +: gpsVersionID + +4.03.55 +: grayResponseUnit + +4.03.56 +: iccProfileName + +4.03.57 +: iccProfileVersion + +4.03.58 +: imageHeight + +4.03.59 +: imageProducer + +4.03.60 +: imageWidth + +4.03.61 +: isoSpeedRating + +4.03.62 +: lightSource + +4.03.63 +: maxApertureValue + +4.03.64 +: meteringMode + +4.03.65 +: oECF + +4.03.66 +: orientation + +4.03.67 +: primaryChromaticitiesBlueX + +4.03.68 +: primaryChromaticitiesBlueY + +4.03.69 +: primaryChromaticitiesGreenX + +4.03.70 +: primaryChromaticitiesGreenY + +4.03.71 +: primaryChromaticitiesRedX + +4.03.72 +: primaryChromaticitiesRedY + +4.03.73 +: qualityLayers + +4.03.74 +: referenceBlackWhite + +4.03.75 +: resolutionLevels + +4.03.76 +: samplesPerPixel + +4.03.77 +: samplingFrequencyUnit + +4.03.78 +: scannerManufacturer + +4.03.79 +: scannerModelName + +4.03.80 +: scannerModelNumber + +4.03.81 +: scannerModelSerialNo + +4.03.82 +: scanningSoftwareName + +4.03.83 +: scanningSoftwareVersionNo + +4.03.84 +: sensingMethod + +4.03.85 +: shutterSpeedValue + +4.03.86 +: spectralSensitivity + +4.03.87 +: subjectDistance + +4.03.88 +: tileHeight + +4.03.89 +: tileWidth + +4.03.90 +: whitePointXValue + +4.03.91 +: whitePointYValue + +4.03.92 +: xSamplingFrequency + +4.03.93 +: ySamplingFrequency + +4.03.94 +: YCbCrCoefficients + +4.03.95 +: YCbCrPositioning + +4.03.96 +: YCbCrSubSampling + +
\ No newline at end of file diff --git a/docs/_posts/2022-03-22-metadata.md b/docs/_posts/2022-03-22-metadata.md new file mode 100644 index 000000000..86c43902c --- /dev/null +++ b/docs/_posts/2022-03-22-metadata.md @@ -0,0 +1,34 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.0 metadata +semantic-components: 4.01Audio
4.02Document
4.03Image
4.04Text
4.05Video
4.06Container +definition: This section contains the format-specific technical metadata after normalization and consolidation of each tool's raw output. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Not repeatable +obligation: Automatic +usage-notes: The elements in this section differ depending on the type of the file format (audio, document, image, text, video). Each type-specific section lists the potential elements that can appear. The actual elements depend on what the tools are able to determine for the file. +--- + +##### Example + +``` + + little endian + Uncompressed + 2982 + 4821 + RGB + 0 255 0 255 0 255 + EPSON Standard RGB - Gamma 1.8 + normal* + in. + 600 + 600 + 8 8 8 + 3 + Adobe Photoshop CS6 (Windows) + 2.2.0 + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-text.md b/docs/_posts/2022-03-22-text.md new file mode 100644 index 000000000..2a79e2edd --- /dev/null +++ b/docs/_posts/2022-03-22-text.md @@ -0,0 +1,55 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.04 Text elements +semantic-components: See [dropdown](#semantic-text) below table for list of components +definition: This section contains technical metadata for text files. The tools will extract the raw output of the file, and FITS normalizes and consolidates the output into the standard elements. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Repeatable +obligation: Automatic +usage-notes: The list of text elements represents the potential properties of a given file. The actual elements depend on what the tools are able to determine for the file. +metadata-standard: Technical Metadata for Text +short-name: TextMD +maintenance-organization: Library of Congress +website: http://www.loc.gov/standards/textMD/ +--- + +
+4.04 Text semantic components + +4.04.01 +: charset + +4.04.02 +: linebreak + +4.04.03 +: markupBasis + +4.04.04 +: markupBasisVersion + +4.04.05 +: markupLanguage + +
+ +##### Example + +``` + + + us-ascii + CR + + + + US-ASCII + CR + + + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-22-video.md b/docs/_posts/2022-03-22-video.md new file mode 100644 index 000000000..56a7c0263 --- /dev/null +++ b/docs/_posts/2022-03-22-video.md @@ -0,0 +1,249 @@ +--- +layout: post +categories: data-dictionary +semantic-unit: 4.05 Video elements +semantic-components: See [dropdown](#semantic-video) below table for list of components +definition: This section contains technical metadata for image files. The tools will extract the raw output of the file, and FITS normalizes and consolidates the output into the standard elements. +rationale: Each file processed with FITS should have unique technical properties to support use and rendering. +data-constraint: Container +repeatability: Repeatable +obligation: Automatic +usage-notes: The list of video elements represents the potential properties of a given file. The actual elements depend on what the tools are able to determine for the file. +metadata-standard: EBUCore metadata +short-name: EBUCore +maintenance-organization: European Broadcasting Union +website: https://tech.ebu.ch/MetadataEbuCore +--- + +
+4.05 Video semantic components + +4.05.01 +: apertureSetting + +4.05.02 +: bitDepth + +4.05.03 +: bitRate + +4.05.04 +: blockSizeMax + +4.05.05 +: blockSizeMin + +4.05.06 +: channels + +4.05.07 +: creatingApplicationName + +4.05.08 +: dataFormatType + +4.05.09 +: digitalCameraManufacturer + +4.05.10 +: digitalCameraModelName + +4.05.11 +: duration + +4.05.12 +: exposureTime + +4.05.13 +: exposureProgram + +4.05.14 +: fNumber + +4.05.15 +: focus + +4.05.16 +: frameRate + +4.05.17 +: gain + +4.05.18 +: gpsAltitude + +4.05.19 +: gpsAltitudeRef + +4.05.20 +: gpsAreaInformation + +4.05.21 +: gpsDateStamp + +4.05.22 +: gpsDestBearing + +4.05.23 +: gpsDestBearingRef + +4.05.24 +: gpsDestDistance + +4.05.25 +: gpsDestDistanceRef + +4.05.26 +: gpsDestLatitude + +4.05.27 +: gpsDestLatitudeRef + +4.05.28 +: gpsDestLongitude + +4.05.29 +: gpsDestLongitudeRef + +4.05.30 +: gpsDifferential + +4.05.31 +: gpsDOP + +4.05.32 +: gpsImgDirection + +4.05.33 +: gpsImgDirectionRef + +4.05.34 +: gpsLatitude + +4.05.35 +: gpsLatitudeRef + +4.05.36 +: gpsLongitude + +4.05.37 +: gpsLongitudeRef + +4.05.38 +: gpsMapDatum + +4.05.39 +: gpsMeasureMode + +4.05.40 +: gpsProcessingMethod + +4.05.41 +: gpsSatellites + +4.05.42 +: gpsSpeed + +4.05.43 +: gpsSpeedRef + +4.05.44 +: gpsStatus + +4.05.45 +: gpsTimeStamp + +4.05.46 +: gpsTrack + +4.05.47 +: gpsTrackRef + +4.05.48 +: gpsVersionID + +4.05.49 +: imageHeight + +4.05.50 +: imageStabilization + +4.05.51 +: imageWidth + +4.05.52 +: sampleRate + +4.05.53 +: shutterSpeedValue + +4.05.54 +: videoStreamType + +4.05.55 +: whiteBalance + +4.05.56 +: xSamplingFrequency + +4.05.57 +: ySamplingFrequency + +
+ +##### Example + +``` + + + +``` \ No newline at end of file diff --git a/docs/_posts/2022-03-28-community-sprint.md b/docs/_posts/2022-03-28-community-sprint.md new file mode 100644 index 000000000..12bffe611 --- /dev/null +++ b/docs/_posts/2022-03-28-community-sprint.md @@ -0,0 +1,17 @@ +--- +layout: post +categories: blog +title: FITS Community Sprint Recap, Winter 2022 +date: 2022-03-28 +author: author +--- + +## Overview and outcomes + +## Highlights + +## Future plans + +
+ +For more details, visit our [GitHub project board](https://github.com/orgs/harvard-lts/projects/3/views/1) \ No newline at end of file diff --git a/docs/about.md b/docs/about.md new file mode 100644 index 000000000..523850b71 --- /dev/null +++ b/docs/about.md @@ -0,0 +1,27 @@ +--- +layout: page +title: About +permalink: /about +--- + +
+

FITS

+ +The File Information Tool Set (FITS) identifies, validates and extracts technical metadata for a wide range of file formats. It acts as a wrapper, invoking and managing the output from several other open source tools. Output from these tools are converted into a common format, compared to one another and consolidated into a single XML output file. + +Note: FITS is written in Java and is **compatible with Java 1.8 or higher**. + +

Release Notes & Source Code

+
+ +--- + +
+

FITS Web Service

+ +The FITS Web Service is a project that allows FITS to be deployed as a service on either Tomcat or JBoss. The code has been built and test using Java 7 and Java 8 and tested on Tomcat 7, Tomcat 8, and minimally on JBoss 7.1. The path to the service will be the WAR file name plus the service name. (E.g. - For release 1.1.1 which provides the release artifact fits-1.1.1.war, use http://localhost:8080/fits-1.1.1/examine/ as the base URL). This can be adjusted by either changing the WAR filename or using server-specific settings. + +Note: The latest and future versions of this project are built and tested using Java 8. + +

Release Notes & Source Code

+
\ No newline at end of file diff --git a/docs/connect.md b/docs/connect.md new file mode 100644 index 000000000..9a105a58a --- /dev/null +++ b/docs/connect.md @@ -0,0 +1,25 @@ +--- +layout: page +title: Connect +subtitle: Connect with the FITS community +permalink: /connect +--- + +
+
+

Join the Google Group

+

Join the FITS Google Group to follow general project discussions and feature announcements.

+
+
+

Chat on Slack

+

Join the FITS Slack channel #fits-community-sprint in the code4lib workspace to chat with others in the community.

+
+
+

Participate in a Community Sprint

+

Get involved the community's next 2-week sprint (date TBD). Sign up for the FITS Google Group to learn when the next sprint will be.

+
+
+

Discuss on GitHub

+

Report bugs, suggest feature enhancements, or request documentation updates by creating an issue on FITS's GitHub issue tracker.

+
+
\ No newline at end of file diff --git a/docs/css/main.css b/docs/css/main.css new file mode 100644 index 000000000..a7d57e838 --- /dev/null +++ b/docs/css/main.css @@ -0,0 +1,316 @@ +.wrapper { + max-width: calc(1170px - (20px * 2)); + margin-right: auto; + margin-left: auto; + padding-right: 20px; + padding-left: 20px; +} + +hr { + margin-top: 40px; + margin-bottom: 40px; +} + +.svg-icon { + width: 20px; + height: 20px; + vertical-align: middle; +} + +a, a:visited, a:hover, a:focus { + color: #1756a9; +} + +.site-header { + background: #426ea8; +} +.site-title, .site-title:visited { + color: white; +} +.site-nav .page-link { + color: white; +} + +/* anchor links need an offset to clear the fixed header */ +:target::before { + content: ""; + display: block; + height: 80px; + /* fixed header height*/ + margin: -80px 0 0; + /* negative fixed header height */ } + +.documentation-post .post-header { + position: sticky; + top: 0; + background: white; + padding-top: 10px; + border-bottom: .55px solid; + z-index: 2; + } + +/* 48em = 768px */ +@media (min-width: 48em) { + .documentation { + display: flex; + justify-content: space-between; + } + + .sidebar { + overflow-y: auto; + height: calc(100vh - 80px); + /* flex: 0 0 25%; + max-width: 25%; */ + flex: 0 0 250px; + max-width: 250px; + } + .sidebar { + border-right: 1px solid black; + padding-right: 20px; + position: -webkit-sticky; + position: sticky; + top: 80px; + } + + .main { + padding-left: 20px; + /* flex: 0 0 calc(75% - 40px); + max-width: calc(75% - 40px); */ + flex: 0 0 calc(100% - 290px); + max-width: calc(100% - 290px); + } +} + +h1, h2, h3, h4, h5, h6 { + font-weight: 600; +} +h5 { + font-size: 16px; +} +.post-content h4, .post-content h5 { + margin-top: 30px; +} + +/* Table of Contents styles */ +#markdown-toc, +#markdown-toc > li > ul { + list-style: none; + margin-left: 0; +} +#markdown-toc > li { + margin: 15px 0; +} +#markdown-toc > li > a { + font-size: 18px; + font-weight: 600; + color: #111; +} +#markdown-toc > li > ul { + margin-left: 15px; +} + +#markdown-toc-references + ul > li { + padding: 5px 0; +} + +/* Homepage */ +.site-title { + font-size: 30px; + font-weight: 700; + letter-spacing: 2px; +} +.home h1, .home h1 + p { + text-align: center; +} +.home h1 { + font-size: 42px; + letter-spacing: 1.2px; +} +.home h1 + p, h1.post-title + p { + font-size: 19px; + color: #424242; +} + + +.post-title { + letter-spacing: 1.2px; +} +a { + font-weight: 500; +} + +.quick-cards { + display: flex; + justify-content: space-between; + flex-wrap: wrap; + margin: 40px 0; + gap: 40px; +} +.quick-cards a { + flex: 1 1 200px; + background: #f5f5f5; + border: 1px solid #e8e8e8; + color: black; + display: flex; + justify-content: center; + align-items: center; +} +.quick-cards a > div { + text-align: center; + padding: 20px; +} +.quick-cards h2 { + margin-bottom: 0; +} + +.fits-contributors { + column-gap: 20px; +} +@media(min-width:480px){ + .fits-contributors { + column-count: 2; + } +} +@media(min-width:780px){ + .fits-contributors { + column-count: 4; + } +} + + +.connect-cards > div { + margin-bottom: 30px; +} +.connect-cards h2 { + font-size: 24px; + line-height: 32px; + margin-bottom: 5px; +} +@media(min-width:620px){ + .connect-cards { + display: grid; + grid-template-columns: calc(50% - 15px) calc(50% - 15px); + grid-gap: 30px; + } + .connect-cards > div { + margin-bottom: 0; + } +} + + +.news-grid { + list-style: none; + margin-left: 0; +} +.news-grid h2 { + font-size: 20px; + line-height: 30px; +} + +/* styling code boxes */ +code { + font-size: 90%; +} +.highlighter-rouge .highlight { + background: #f5f5f5; + border-left: 3px solid #426ea8; + border-radius: 0; +} +.highlight code { + background: #f5f5f5; +} + + +/* semantic components within table */ +table td span { + display: inline-block; + min-width: 43px; +} +#identification + table td span { + min-width: 30px; +} +#filestatus + table td span { + min-width: 55px; +} + +/* semantic components dropdowns */ +#audio-elements + table, #document-elements + table, #text-elements + table, #image-elements + table, #video-elements + table { + margin-bottom: -50px; +} +#semantic-audio, #semantic-document, #semantic-text, #semantic-image, #semantic-video { + padding-top: 80px; +} + +summary { + font-weight: 600; + padding: 10px; + background: #f5f5f5; +} +summary::marker { + color: #426ea8 +} +summary:hover { + cursor: pointer; +} +details dl { + font-size: 90%; + column-count: 3; + column-gap: 40px; + column-rule: 2px solid #426ea8; + padding: 5px 15px 15px; + background: #f5f5f5; + +} +details dt { + display: inline-block; + width: 60px; +} +details dd { + display: inline-block; + width: calc(100% - 65px); +} +@media(max-width:1170px){ + details dl { + column-count: 2; + } +} +@media(max-width:480px){ + details dl { + column-count: 1; + } +} + +/* noramlize table rows */ +table tr td:first-child { + width: 150px; +} +/* excluse command line table */ +#command-line-options + p + table tr td:first-child { + width: auto; +} + + +/* ALERT BOX */ +a.alert-box { + padding: 20px; + margin-bottom: 40px; + background: #f5f5f5; + color: black; + display: block; + font-size: 19px; + line-height: 21px; + border: 1px solid #e8e8e8; + border-left: 15px solid #426ea8; + display: flex; +} +a.alert-box svg { + padding-right: 10px; +} + +/* FOOTER */ +.contact-list li + li { + padding-top: 5px; +} +.footer-col-wrapper { + color: #111; +} diff --git a/docs/guides.md b/docs/guides.md new file mode 100644 index 000000000..8e3f4d6cb --- /dev/null +++ b/docs/guides.md @@ -0,0 +1,30 @@ +--- +layout: documentation +title: User manual +permalink: /user-manual +--- + + + +
+ +## Getting Started + +{% include_relative guides/getting-started.md %} + +## Using FITS + +{% include_relative guides/overview-of-fits-processing.md %} +{% include_relative guides/understanding-the-output.md %} + +## References + +{% include_relative guides/command-line-options.md %} +{% include_relative guides/data-dictionary.md %} +{% include_relative guides/fits-configuration-files.md %} +{% include_relative guides/tools.md %} + +
diff --git a/docs/guides/command-line-options.md b/docs/guides/command-line-options.md new file mode 100644 index 000000000..b0c8a9fa3 --- /dev/null +++ b/docs/guides/command-line-options.md @@ -0,0 +1,27 @@ +### Command-line options + +When you run FITS on the command-line, the following options are available: + +| Option | Description | +| :---------: | ----------- | +| -h | Prints a help message to the screen. | +| -i | Indicates that a file or directory to process will follow. (required) | +| -o | Directs the FITS output to a file (if -i is a directory) rather than console. (optional) | +| -r | Causes FITS to recursively process all files when the input is a directory. All output files are placed in the same directory as configured in -o unless -n is set. (optional) | +| -n | When -r is set and -i is a directory, output files are placed in nested directories in the same way the input directories are nested. (optional) | +| -v | Outputs tool version information. | +| -x | Transforms the FITS output into standard XML schemas. (Only standard schema metadata is output.) | +| -xc | Outputs the FITS output plus the FITS output transformed into standard XML schemas. | +| -f | Path to an alternate fits.xml configuration file rather than using the default within FITS. (optional) | + + + +Many of the options can be used together. For example: + +```.\fits.bat -i myFileToProcess.pdf -o theOutput.txt``` + +When processing multiple files contained in a single directory whose output goes to another directory while using an alternate FITS configuration file: + +```./fits.sh -i /input-files-directory -o /output-directory -xc -f /alternate-fits-config/fits.xml``` + +--- \ No newline at end of file diff --git a/docs/guides/data-dictionary.md b/docs/guides/data-dictionary.md new file mode 100644 index 000000000..4fe9517ce --- /dev/null +++ b/docs/guides/data-dictionary.md @@ -0,0 +1,9 @@ +### Data dictionary + +FITS converts the raw output of each wrapped tool to a structure called FITS XML. [FITS XML schema](http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd) is maintained by Harvard Library. + +The FITS Schema also includes additional top-level elements: technicalMetadata, toolOutputType, and statistics. + +{% include data-dictionary-unit.md %} + +--- \ No newline at end of file diff --git a/docs/guides/fits-configuration-files.md b/docs/guides/fits-configuration-files.md new file mode 100644 index 000000000..1eb42c87d --- /dev/null +++ b/docs/guides/fits-configuration-files.md @@ -0,0 +1,103 @@ +### FITS configuration files + +The FITS configuration files are located in the [xml directory](https://github.com/harvard-lts/fits/tree/dev/xml). + +**The FITS XML output is highly affected by how FITS is configured. In particular, the order of tools near the top of the fits.xml configuration file specifies which tools FITS should prefer when they give conflicting information and if FITS should ignore tool output for particular formats. FITS comes pre-configured based on testing different tools with different formats and the default configuration should only be changed with a great deal of care and testing.** + +#### [fits.xml](https://github.com/harvard-lts/fits/blob/dev/xml/fits.xml) +This is the main configuration file for FITS. The key pieces are described here: + +##### tool element +{:.no_toc} + +Lists all the tools that FITS should know about. The order of these elements determines the preference in favoring one tool over another, for example when there are multiple tools reporting formats or technical metadata for a file. + +The following are attributes of the tool element: +- **class** (required) - specifies the fully qualified name of the Java class that implements the Tool interface +- **exclude-exts** (optional) - specifies by file extension files that the tool should not process. This is useful if you know a tool misidentifies or generates inaccurate metadata for specific types of files +- **include-exts** (optional) - indicates to FITS to use the information reported by the tool for particular file extensions +- **classpath-dirs** (optional) - for Java-based tools when there is a need to provide class isolation via a custom class loader. By convention, any tool-specific JAR files, including any 3rd-party dependencies, should be put into a ```lib/``` directory. Additional directories can be added for configuration files that need to be discovered via the tool’s class loader. These files might go in, for example, ```xml/```. This custom class loader will load classes from the bottom up (rather than the standard Java top down scheme). The value for this attribute is the name of the sub-directory containing any JAR files for this tool. + +##### output element +{:.no_toc} + +Contains elements that control FITS metadata output: +- **data-consolidator** - specifies the class to use for consolidating the tool output. It's possible to use custom logic to control the tool output consolidation processes by creating a class implementing the ToolOutputConsolidator interface. +- **display-tool-output** - whether or not to append the output of the native tool output for each tool to the final consolidated FITS XML output, can be set to either true or false +- **report-conflicts** - whether or not to report when there is conflicting tool information about formats or metadata, can be set to either true or false. If set to true, conflicts will be shown in the final FITS XML output. If set to false, only the output from the most preferred tool (controlled by the ordering of the tool elements) will be displayed. +- **validate-tool-outpu**t - whether or not to validate tool output, can be set to either true or false. Generally this should be set to true. Setting it to false will disable schema validation of the output from each tool. + - NOTE: The local copy provided with FITS is used for validation during the file processing. As each tool has its output converted to the FITS format it is validated using the local schema. This can be disabled by setting `````` in xml/fits.xml to false. +- **internal-output-schema** - the location of the local copy of the XML schema specifying the FITS XML output, used during FITS execution +- **external-output-schema** - the location of the remote XML schema controlling the FITS XML output, written to the output file +- **fits-xml-namespace** - the XML namespace to use in the FITS XML output +- **enable-statistics** - whether or not to output the statistics block containing performance metrics about each tool that processed the file, can be set to either true or false enable-checksum - whether or not to compute the MD5 checksum for the file, can be set to either true or false +- **checksum­-exclusions** - ­file extensions to be excluded in the checksum calculation. + - NOTE: This configuration parameter will only be enforced if the above enable­-checksum is set to true. + + +##### process/maxThreads element +{:.no_toc} + +The maximum number of threads to use + +##### droid_sigfile element +{:.no_toc} + +The signature file to use with the Droid tool. [Get the list of all previously released signature files](https://www.nationalarchives.gov.uk/aboutapps/pronom/droid-signature-files.htm) + + +##### droid_read_limit element +{:.no_toc} + +This allows for limiting the amount of a file (from its beginning) that is to be examined by the DROID tool (in order to increase processing speed). For example, for some types of large video and audio files, only the first 64K bytes need to be examined to extract relevant metadata. The attribute **include-exts** sets the file extension that this limiter should be applied to, and the attribute **read-limit-kb** sets the limit, in kilobytes, of how much of the beginning of the designated file types should be examined. The default behavior (when this element remains commented-out) is for DROID to examine all files in their entirety. + + +#### [fits_format_tree.xml](https://github.com/harvard-lts/fits/blob/dev/xml/fits_format_tree.xml) +Certain formats are a more specific subset of a more general format. The format tree in this file specifies these relationships. Nested formats are more specific versions of the formats they are nested under. FITS uses this to know when to report format conflicts and when it should report a more specific format. + +During output consolidation the format tree is consulted, and any less specific format identities are thrown out. For example, OpenOffice text document formats are ZIP-based. Some tools identify these files as ZIP, and others as ODT. Any tools identifying the file as a ZIP would be discarded according to the rules set by the format tree. + +An example follows using a snippet of the format tree: + +``` + + + + + + +``` + +The above snippet of the format tree should be interpreted as: JPEG 2000 JP2 and JPEG 2000 JPX are more specific forms of the JPEG 2000 format. If one FITS-wrapped tool were to report the format of a file as JPEG 2000 and another reported it as JPEG 2000 JP2, FITS would report the more specific format (JPEG 2000 JP2) and would not report that there was a format conflict (because both tools were technically correct). + + +#### [fits_output.xsd](https://github.com/harvard-lts/fits/blob/dev/xml/fits_output.xsd) +Schema for the output of FITS XML files. + + +#### [fits_xml_map.xml](https://github.com/harvard-lts/fits/blob/dev/xml/fits_xml_map.xml) +This mapping file is used to normalize the values output by some of the tools that FITS wraps, for example to change Jhove's Greyscale value to Grayscale. It allows substitution of one value for another on a tool by tool, element by element basis. + +For example, if a tool outputs the value "2" as the sampling frequency unit for an image, but you want to use the text string "inches" instead, you could add an entry to fits_xml_map.xml. Mappings are applied automatically when a tool creates its FITS output, prior to output consolidation. You must specify the tool name, version, and element name that you want mapped. Currently all mapping-related needs are handled in the tool's XSLT. + + +#### [format_map.txt](https://github.com/harvard-lts/fits/blob/dev/xml/format_map.txt) +The file is used to normalize format names output by some of the tools that FITS wraps. + + +#### [mime_map.txt](https://github.com/harvard-lts/fits/blob/dev/xml/mime_map.txt) +The file is used to normalize MIME media type values output by some of the tools that FITS wraps. + + +#### [mime_to_format_map.txt](https://github.com/harvard-lts/fits/blob/dev/xml/mime_to_format_map.txt) +Used to map format names to MIME media types for some of the tools that FITS wraps. + + +#### [prettyprint.xslt](https://github.com/harvard-lts/fits/blob/dev/xml/prettyprint.xslt) +Transforms the standard FITS output into “pretty print” XML formatting for easier human readability. + + +#### [xslt_map.xsd](https://github.com/harvard-lts/fits/blob/dev/xml/xslt_map.xsd) +Schema for transformation maps for these tools: exiftool_xslt_map.xml, jhove_xslt_map.xml, nlnx_xslt_map.xml. + +--- \ No newline at end of file diff --git a/docs/guides/fits-xml.md b/docs/guides/fits-xml.md new file mode 100644 index 000000000..b41adb5e6 --- /dev/null +++ b/docs/guides/fits-xml.md @@ -0,0 +1,750 @@ +### FITS XML schema + +FITS converts the raw output of each wrapped tool to a structure called FITS XML. [FITS XML schema](http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd) is maintained by Harvard Library. + +#### identification +This section contains the file format in one or more identity blocks. If all the tools that processed the file and could identify it came up with the same format, there will only be one identity block. If there were tools that processed the file that came up with an alternative format, there will be multiple identity blocks. The tools that identified the format will be nested within the identity elements. Some examples follow. + +##### EXAMPLE: SUCCESSFUL FORMAT IDENTIFICATION +{:.no_toc} + + + +In this example, two tools (Jhove 1.5 and file utility 5.04) identified the format as Plain text with a MIME media type of text/plain. + +``` + + + + + + +``` + +##### EXAMPLE: FORMAT CONFLICT +{:.no_toc} + + + +In this example, there is a "format conflict". The tool Exiftool 9.13 identified the format as PCD with MIME media type image/x-photo-cd, but the tool Tika 1.3 identified the format as MPEG-1 Audio Layer 3. Notice in this case that the identification element will carry an attribute status value of CONFLICT. + +```` + + + + + + + + + + +```` + +#### fileinfo +This section contains basic technical metadata that isn't specific to any format: + +- copyrightBasis element +- copyrightNote element +- created element (file creation date) +- creatingApplicationName element (name of the software used to create the file) +- creatingApplicationVersion element (version of the software used to create the file) +- creatingos element (Operating system used to create the file) +- filepath element (full filepath to the file) +- filename element (name of the file) +- fslastmodified element (last modified date based on file system metadata) +- inhibitorType element (type of file inhibitor) +- inhibitorTarget element (what is being inhibited) +- lastmodified element (last modified date based on metadata embedded in the file) +- md5checksum element (MD5 value for the file) +- rightsBasis element +- size element (size of the file in bytes) + +Each of the above elements will carry toolname and toolversion attributes to record the name of the tool that is the source of the information. In most cases there will also be a status attribute value equal to ```SINGLE_RESULT``` which means that there wasn't any conflicting information output by a tool. In some cases, for example if tools reported different file creation dates there will be a status value of ```CONFLICT```. + +#### filestatus +If any of the tools are able to validate files in this format, this section will contain validity information: + +- message element (more information from tools about what was found) +- valid element (whether or not the file was found to be valid) +- well-formed element (whether or not the file was found to be well-formed) + +#### metadata +This section contains the format-specific technical metadata after each tool's native output has been normalized and consolidated by FITS. The elements in this section differ depending on the genre of the file format (audio, document, image, text, video). Each genre-specific section below lists the potential elements that can appear; the actual elements depend on what the tools are able to determine for the file. + +
+AUDIO ELEMENTS + +
+ + audioDataEncoding + + + avgBitRate + + + avgPacketSize + + + bitDepth + + + bitRate + + + blockAlign + + + blockSizeMax + + + blockSizeMin + + + byteOrder + + + channels + + + duration + + + maxBitRate + + + maxPacketSize + + + numPackets + + + numSamples + + + offset + + + sampleRate + + + software + + + soundField + + + time + + + wordSize + +
+ +
+ +
+DOCUMENT ELEMENTS + +
+ + author + + + hasAnnotations + + + hasOutline + + + isProtected + + + isRightsManaged + + + isTagged + + + language + + + pageCount + + + title + +
+ +
+ +
+IMAGE ELEMENTS + +
+ + apertureValue + + + bitsPerSample + + + brightnessValue + + + byteOrder + + + captureDevice + + + cfaPattern + + + cfaPattern2 + + + colorMap + + + colorSpace + + + compressionScheme + + + digitalCameraManufacturer + + + digitalCameraModelName + + + digitalCameraSerialNo + + + exifVersion + + + exposureBiasValue + + + exposureIndex + + + exposureProgram + + + exposureTime + + + extraSamples + + + flash + + + flashEnergy + + + fNumber + + + focalLength + + + gpsAltitudeRef + + + gpsAltitude + + + gpsAreaInformation + + + gpsDateStamp + + + gpsDestBearing + + + gpsDestBearingRef + + + gpsDestDistance + + + gpsDestDistanceRef + + + gpsDestLatitude + + + gpsDestLatitudeRef + + + gpsDestLongitude + + + gpsDestLongitudeRef + + + gpsDifferential + + + gpsDOP + + + gpsImgDirection + + + gpsImgDirectionRef + + + gpsLatitude + + + gpsLatitudeRef + + + gpsLongitude + + + gpsLongitudeRef + + + gpsMapDatum + + + gpsMeasureMode + + + gpsProcessingMethod + + + gpsSatellites + + + gpsSpeed + + + gpsSpeedRef + + + gpsStatus + + + gpsTimeStamp + + + gpsTrack + + + gpsTrackRef + + + gpsVersionID + + + grayResponseUnit + + + iccProfileName + + + iccProfileVersion + + + imageHeight + + + imageProducer + + + imageWidth + + + isoSpeedRating + + + lightSource + + + maxApertureValue + + + meteringMode + + + oECF + + + orientation + + + primaryChromaticitiesBlueX + + + primaryChromaticitiesBlueY + + + primaryChromaticitiesGreenX + + + primaryChromaticitiesGreenY + + + primaryChromaticitiesRedX + + + primaryChromaticitiesRedY + + + qualityLayers + + + referenceBlackWhite + + + resolutionLevels + + + samplesPerPixel + + + samplingFrequencyUnit + + + scannerManufacturer + + + scannerModelName + + + scannerModelNumber + + + scannerModelSerialNo + + + scanningSoftwareName + + + scanningSoftwareVersionNo + + + sensingMethod + + + shutterSpeedValue + + + spectralSensitivity + + + subjectDistance + + + tileHeight + + + tileWidth + + + whitePointXValue + + + whitePointYValue + + + xSamplingFrequency + + + ySamplingFrequency + + + YCbCrCoefficients + + + YCbCrPositioning + + + YCbCrSubSampling + +
+ +
+ +
+TEXT ELEMENTS + +
+ + charset + + + linebreak + + + markupBasis + + + markupBasisVersion + + + markupLanguage + +
+ +
+ +
+VIDEO ELEMENTS + +
+ + apertureSetting + + + bitDepth + + + bitRate + + + blockSizeMax + + + blockSizeMin + + + channels + + + creatingApplicationName + + + dataFormatType + + + digitalCameraManufacturer + + + digitalCameraModelName + + + duration + + + exposureTime + + + exposureProgram + + + fNumber + + + focus + + + frameRate + + + gain + + + gpsAltitude + + + gpsAltitudeRef + + + gpsAreaInformation + + + gpsDateStamp + + + gpsDestBearing + + + gpsDestBearingRef + + + gpsDestDistance + + + gpsDestDistanceRef + + + gpsDestLatitude + + + gpsDestLatitudeRef + + + gpsDestLongitude + + + gpsDestLongitudeRef + + + gpsDifferential + + + gpsDOP + + + gpsImgDirection + + + gpsImgDirectionRef + + + gpsLatitude + + + gpsLatitudeRef + + + gpsLongitude + + + gpsLongitudeRef + + + gpsMapDatum + + + gpsMeasureMode + + + gpsProcessingMethod + + + gpsSatellites + + + gpsSpeed + + + gpsSpeedRef + + + gpsStatus + + + gpsTimeStamp + + + gpsTrack + + + gpsTrackRef + + + gpsVersionID + + + imageHeight + + + imageStabilization + + + imageWidth + + + sampleRate + + + shutterSpeedValue + + + videoStreamType + + + whiteBalance + + + xSamplingFrequency + + + ySamplingFrequency + +
+ +
+ +#### toolOutput +When the fits.xml file is configured to also output the native tool output, this section will contain the output from each tool that ran against the file, each surrounded by tool elements like this example: + +``` + + + [Jhove's native output] + + + [file utility's native output] + + + [ExifTool's native output] + + + [Droid's native output] + + + [NLNZ Metadata Extractor's native output] + + + [OIS File Information's native output] + + + [ffident's native output] + + + [Tika's native output] + + +``` + +#### statistics + In later versions of FITS this section was added to record how much time each wrapped tool spent processing the file. As shown in this example, when a tool isn't run against a file, a status attribute value of "did not run" is output: + +``` + + + + + + + + + + + + + +``` + +#### Additional things to understand about the FITS XML schema **(this should be reworded, maybe move above the schema part)** + +##### STATUS ATTRIBUTE +{:.no_toc} +If multiple tools disagree on a format identity or other metadata values, a status attribute is added to the element with a value of ```CONFLICT```. If only a single tool reports a format identity or other metadata value, a status attribute is added to the element with a value of ```SINGLE_RESULT```. If multiple tools agree on a an identity or value, and none disagree, the status attribute is omitted. A ```PARTIAL``` value is written when the format can only be partially identified, for example a format name is identified but not a MIME media type. + +##### TOOL ORDERING PREFERENCE +{:.no_toc} +The ordering preference of the tools in xml/fits.xml determines the ordering of conflicting values. If the report-conflict configuration option is set to false then only the tool that first reported the element is displayed and the other conflicting values are discarded. + +##### RELATIONSHIP BETWEEN FORMAT IDENTIFICATION AND TECHINICAL METADATA +{:.no_toc} +All tools that agree on a format identity are consolidated into a single `````` section. + +**Technical metadata is only output (and a part of the consolidation process) for tools that were able to identify the file and that are listed in the first `````` section. All other output is discarded**. + +##### TOOL OUTPUT NORMALIZATION +{:.no_toc} +It’s possible for tools to output conflicting data when they actually mean the same thing. For example, one tool could report the format of a PNG image as “Portable Network Graphics”, while another may report “PNG”. A tool could report a sampling frequency unit of “2”, while another may report the text string “inches”. If left alone, these would cause false positive conflicts to appear in the FITS consolidated output. These differences are converted in the XSLT that converts the native tool output into FITS XML. In general, FITS prefers text strings to numeric values (“inches” instead of “2”), and complete format names to abbreviations (“Portable Network Graphics” instead of “PNG”). If new tools or formats are being added to FITS then thorough testing should be done to ensure that any false positive conflicts are resolved. + +--- \ No newline at end of file diff --git a/docs/guides/getting-started.md b/docs/guides/getting-started.md new file mode 100644 index 000000000..70a407044 --- /dev/null +++ b/docs/guides/getting-started.md @@ -0,0 +1,24 @@ +### What is FITS? + +The File Information Tool Set (FITS) identifies, validates and extracts technical metadata for a wide range of file formats. It acts as a wrapper, invoking and managing the output from several other open source tools. Output from these tools are converted into a common format, compared to one another and consolidated into a single XML output file. FITS is written in Java and is **compatible with Java 1.8 or higher**. + +The external tools currently used are: + +- [Apache Tika](http://tika.apache.org/) +- [Jhove](https://github.com/openpreserve/jhove) +- [MediaInfo](https://mediaarea.net/en/MediaInfo) +- [Exiftool](http://www.sno.phy.queensu.ca/~phil/exiftool/) +- [National Library of New Zealand Metadata Extractor](http://meta-extractor.sourceforge.net/) +- [DROID](https://github.com/digital-preservation/droid) +- [FFIdent](http://schmidt.devlib.org/ffident/index.html) +- [File Utility (windows)](http://gnuwin32.sourceforge.net/) + +### Why use FITS? + +Preservationists and digital curators who are concerned with long-term access and use of digital files might extract technical metadata from digital files to troubleshoot problems and to create a record of the file’s properties. A number of tools can reliably extract technical metadata, but the File Information Tool Set combines these tools and compares the results of their output; this saves time and effort. + +### Installing FITS + +See our Quick Start guide to get started. Then, come back here to learn more about using FITS. + +--- \ No newline at end of file diff --git a/docs/guides/overview-of-fits-processing.md b/docs/guides/overview-of-fits-processing.md new file mode 100644 index 000000000..ae0073238 --- /dev/null +++ b/docs/guides/overview-of-fits-processing.md @@ -0,0 +1,16 @@ +### How FITS processes + +FITS works in different stages as shown in the image below. + + + +The steps are described in more detail here. + +1. First the configuration files are read. This determines which tools are called and can affect the output. +2. Each tool (JHOVE etc.) is called in parallel to process the file or directory of files (depending on the option used). Each tool's native output is converted to FITS XML. +3. All of the FITS XML is consolidated into a single instance of FITS XML. +4. The FITS XML is converted to standard XML (e.g. MIX) (if this option was requested for example by using the -x parameter on the command line). + +For a more technical description of FITS processing - see the [Developer Manual](https://github.com/harvard-lts/fits/wiki/Developer-Manual#fits-processing). + +--- \ No newline at end of file diff --git a/docs/guides/standard-metadata-schemas.md b/docs/guides/standard-metadata-schemas.md new file mode 100644 index 000000000..2456ef093 --- /dev/null +++ b/docs/guides/standard-metadata-schemas.md @@ -0,0 +1,57 @@ +### Standard metadata schemas +When FITS is told to output standard XML metadata (e.g. by use of the -x or -xc options on the command-line), the following community-standard XML schemas for technical metadata are used: + +#### Audio + +| --------- | ----------- | +| Short name | AES Audio Object | +| Full name | AES standard for audio metadata – Audio object structures for preservation and restoration | +| Maintenance organization | Audio Engineering Society, Inc. (AES) | +| Website | [http://www.aes.org/publications/standards/search.cfm?docID=84](http://www.aes.org/publications/standards/search.cfm?docID=84) [http://www.aes.org/standards/blog/2011/9/aes57-2011-metadata-audio-object](http://www.aes.org/standards/blog/2011/9/aes57-2011-metadata-audio-object) | + + +#### Documents **(has this been deprecated?)** + +| --------- | ----------- | +| Short name | DocumentMD | +| Full name | Document Metadata: document technical metadata for digital preservation | +| Maintenance organization | Florida Virtual Campus / Harvard Library | +| Website | [http://fclaweb.fcla.edu/content/format-specific-metadata](http://fclaweb.fcla.edu/content/format-specific-metadata) | + + +#### Images + +| --------- | ----------- | +| Short name | MIX | +| Full name | NISO Metadata for Images in XML Schema | +| Maintenance organization | Library of Congress (for NISO) | +| Website | [http://www.loc.gov/standards/mix/](http://www.loc.gov/standards/mix/) | + + +#### Text + +| --------- | ----------- | +| Short name | TextMD | +| Full name | Technical Metadata for Text | +| Maintenance organization | Library of Congress | +| Website | [http://www.loc.gov/standards/textMD/](http://www.loc.gov/standards/textMD/) | + + +#### Video + +| --------- | ----------- | +| Short name | EBUCore | +| Full name | EBUCore metadata | +| Maintenance organization | European Broadcasting Union | +| Website | [https://tech.ebu.ch/MetadataEbuCore](https://tech.ebu.ch/MetadataEbuCore) | + + +#### Containers + +| --------- | ----------- | +| Short name | ContainerMD | +| Full name | ContainerMD | +| Maintenance organization | Bibliothèque Nationale de France | +| Website | [http://bibnum.bnf.fr/containerMD-v1_1/index.html](http://bibnum.bnf.fr/containerMD-v1_1/index.html) | + +--- \ No newline at end of file diff --git a/docs/guides/tools.md b/docs/guides/tools.md new file mode 100644 index 000000000..44d623673 --- /dev/null +++ b/docs/guides/tools.md @@ -0,0 +1,7 @@ +### Tools & Libraries + +The latest version of FITS is configured to a number of open source projects. [All project licenses are available in our GitHub repository](https://github.com/harvard-lts/fits/tree/dev/Licenses). + +{% include tool.md %} + +--- \ No newline at end of file diff --git a/docs/guides/understanding-the-output.md b/docs/guides/understanding-the-output.md new file mode 100644 index 000000000..35400c97f --- /dev/null +++ b/docs/guides/understanding-the-output.md @@ -0,0 +1,36 @@ +### Reading the output + +#### Output format +{:.no_toc} +The output format of FITS is controlled by the options used when executing FITS, how FITS is configured and the genre of the format. + +The format of the output will include one or more of the following: + +##### FITS XML +{:.no_toc} +- This is the default output described in detail [here](https://projects.iq.harvard.edu/fits/fits-xml) + +##### Standardized Metadata +{:.no_toc} +- This is format genre-specific technical metadata in community-standard XML schemas +- When using the command-line, use the -x parameter (to just get the output in standard metadata), or -xc (to get FITS XML in addition to standard metadata) +- The specific XML schema used is determined by the format genre - for more information see the [standard metadata schemas](https://projects.iq.harvard.edu/fits/standard-metadata-schemas) + +##### Raw output +{:.no_toc} +- This is the pre-normalized output of each tool run against the file +- This is specified by the display-tool-output configuration property in the [fits.xml configuration file](https://projects.iq.harvard.edu/fits/fits-configuration-files#fits_xml_config) + +#### Output destination +{:.no_toc} + +##### Terminal +{:.no_toc} +- This is the default unless an output file is specified + +##### File +{:.no_toc} +- When using the command-line, use the -o parameter +- When using the Java API, use the FitsOutput.saveToDisk method + +--- \ No newline at end of file diff --git a/docs/images/fits-logo.png b/docs/images/fits-logo.png new file mode 100644 index 000000000..9b11e8d8a Binary files /dev/null and b/docs/images/fits-logo.png differ diff --git a/docs/images/fits_process_current.jpg b/docs/images/fits_process_current.jpg new file mode 100644 index 000000000..eb1840500 Binary files /dev/null and b/docs/images/fits_process_current.jpg differ diff --git a/docs/images/fits_process_old.jpg b/docs/images/fits_process_old.jpg new file mode 100644 index 000000000..0cb0209df Binary files /dev/null and b/docs/images/fits_process_old.jpg differ diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 000000000..6d9512461 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,59 @@ +--- +# Feel free to add content and custom Front Matter to this file. +# To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults + +layout: home +--- + +

File Information Tool Set

+

Identify, validate and extract technical metadata for a wide range of file formats

+ + + + + + +

FITS Contributors

+ \ No newline at end of file diff --git a/docs/news.md b/docs/news.md new file mode 100644 index 000000000..8a2425852 --- /dev/null +++ b/docs/news.md @@ -0,0 +1,24 @@ +--- +layout: page +title: News +subtitle: The lastest news, presentations, and blog posts +permalink: /news +--- + +
    + {% for post in site.categories.blog %} +
  • +

    + {{ post.title }} +

    +

    {{ post.snippet }}

    + +
  • +
    + {% endfor %} +
\ No newline at end of file diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 000000000..419b3fe18 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,75 @@ +--- +layout: documentation +title: Quick start +# subtitle: brief description of page +permalink: /quick-start +--- + + + +
+ +## 1. System Requirements + +FITS is a Java program and requires Java version 1.8 or higher. To find out your Java version type java -version in a command-line window. + +## 2. Installation + +Download the [latest release](https://github.com/harvard-lts/fits/releases). If this is your first time downloading FITS, create a directory for FITS: + +- On Windows: C:\Program Files\Fits +- On Mac OS X: /Applications/Fits +- On *nix: /home/myuser/Fits + +Extract the contents of your ZIP file to your FITS directory. You should end up with a another directory under your top-level FITS directory that has a version number embedded in it, for example on Windows: C:\Program Files\Fits\fits-1.3.0 + +## 3. Running FITS + +FITS can be run on a command-line or within a program using the Java API. + +## 4. FITS from the command-line + +Run FITS on the command-line using one of the start-up scripts (fits.bat on Windows, fits.sh on Mac OS X and *nix). + +On Windows 7: + +- Open up a command line interface window: Click on Start -> Type in cmd in the lower-left box and hit enter +- Navigate to the directory where you installed FITS, for example: cd "..\..\Program Files\fits\fits-1.3.0" +- Execute FITS using the start-up script with the -h parameter to see the parameter options: fits.bat -h + +On *nix: + +- Open up a terminal window. +- Navigate to the directory where you installed FITS +- If it not already, make the fits.sh file executable: chmod +x fits.sh +- Run the script named fits.sh: ./fits.sh + +Here are a couple examples of running FITS to get you started. These are relatively simple examples assuming Windows - more complex examples can be found in the on-line user manual. + +- Run FITS against its release text file printing the FITS output to the terminal: + - ```fits.bat -i version.properties``` + - On Linux: ```./fits.sh -i version.properties``` +- Run FITS against its release text file saving the FITS output to a file: + - ```fits.bat -i version.properties -o myoutput.txt``` +- Output the technical metadata only (in the TextMD format) for the file to the terminal: + - ```fits.bat -x -i version.properties``` +- Output the FITS output plus technical metadata (in the TextMD format) for the text file to the terminal: + - ```fits.bat -xc -i version.properties``` + +**NOTE**: It may be necessary to increase Java heap memory when processing large audio or video file. To do this, modify the command line startup script by adding the following right after "java": + +- ```-Xmx``` (where is in megabytes or gigabytes) +- Example: ```java -Xmx512m or java -Xmx5g``` + +## 5. Using FITS' Java API + +See the [Developer Manual](https://github.com/harvard-lts/fits/wiki/Developer-Manual). + +## 6. Next steps + +After you are up and running see the [User Manual](/fits/user-manual) for more documentation. + +
\ No newline at end of file