From eb6dd87c446595b3631e03a5d7687f438b61aabb Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Thu, 5 Apr 2018 20:19:32 +1200
Subject: [PATCH 01/32] update public modules

---
 src/encodings/mod.rs | 2 +-
 src/lib.rs           | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/src/encodings/mod.rs b/src/encodings/mod.rs
index 7c6586a..d0172a6 100644
--- a/src/encodings/mod.rs
+++ b/src/encodings/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod rle;
 pub mod encoding;
 pub mod decoding;
-pub mod rle;
 pub mod levels;
diff --git a/src/lib.rs b/src/lib.rs
index a7a52f9..26a2b61 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -37,7 +37,6 @@ extern crate flate2;
 extern crate rand;
 extern crate x86intrin;
 
-// TODO: don't expose everything!
 #[macro_use]
 pub mod errors;
 pub mod basic;
@@ -45,11 +44,10 @@ pub mod data_type;
 mod parquet_thrift;
 
 #[macro_use]
-pub mod util;
+mod util;
+mod compression;
+mod encodings;
 pub mod column;
 pub mod record;
-pub mod compression;
-
 pub mod schema;
 pub mod file;
-pub mod encodings;

From 331fe7d3fc32a39a8922286f12663f185d1ee40e Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Thu, 5 Apr 2018 21:38:51 +1200
Subject: [PATCH 02/32] add docs

---
 src/bin/parquet-read.rs   | 54 +++++++++++++++++++++++++++++++++++++++
 src/bin/parquet-schema.rs | 35 +++++++++++++++++++++++++
 src/lib.rs                | 45 ++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)

diff --git a/src/bin/parquet-read.rs b/src/bin/parquet-read.rs
index 70530b3..e655e05 100644
--- a/src/bin/parquet-read.rs
+++ b/src/bin/parquet-read.rs
@@ -1,3 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary file to read data from a Parquet file.
+//!
+//! # Install
+//!
+//! `parquet-read` can be installed using `cargo`:
+//! ```
+//! cargo install parquet
+//! ```
+//! After this `parquet-read` should be globally available:
+//! ```
+//! parquet-read XYZ.parquet
+//! ```
+//!
+//! The binary can also be built from the source code and run as follows:
+//! ```
+//! cargo run --bin parquet-read XYZ.parquet
+//! ```
+//!
+//! # Usage
+//!
+//! ```
+//! parquet-read <file-path> [num-records]
+//! ```
+//! where `file-path` is the path to a Parquet file and `num-records` is the optional
+//! numeric option that allows to specify number of records to read from a file.
+//! When not provided, all records are read.
+//!
+//! Note that `parquet-read` reads full file schema, no projection or filtering is
+//! applied.
+//!
+//! For example,
+//! ```
+//! parquet-read data/alltypes_plain.snappy.parquet
+//!
+//! parquet-read data/alltypes_plain.snappy.parquet 4
+//! ```
+
 extern crate parquet;
 
 use std::env;
diff --git a/src/bin/parquet-schema.rs b/src/bin/parquet-schema.rs
index 40e3aef..1eb6277 100644
--- a/src/bin/parquet-schema.rs
+++ b/src/bin/parquet-schema.rs
@@ -15,6 +15,41 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Binary file to print the schema and metadata of a Parquet file.
+//!
+//! # Install
+//!
+//! `parquet-schema` can be installed using `cargo`:
+//! ```
+//! cargo install parquet
+//! ```
+//! After this `parquet-schema` should be globally available:
+//! ```
+//! parquet-schema XYZ.parquet
+//! ```
+//!
+//! The binary can also be built from the source code and run as follows:
+//! ```
+//! cargo run --bin parquet-schema XYZ.parquet
+//! ```
+//!
+//! # Usage
+//!
+//! ```
+//! parquet-schema <file-path> [verbose]
+//! ```
+//! where `file-path` is the path to a Parquet file and `verbose` is the optional boolean
+//! flag that allows to print schema only, when set to `false` (default behaviour when
+//! not provided), or print full file metadata, when set to `true`.
+//! For example,
+//! ```
+//! parquet-schema data/alltypes_plain.snappy.parquet
+//!
+//! parquet-schema data/alltypes_plain.snappy.parquet false
+//!
+//! parquet-schema data/alltypes_plain.snappy.parquet true
+//! ```
+
 extern crate parquet;
 
 use std::env;
diff --git a/src/lib.rs b/src/lib.rs
index 26a2b61..0cb9414 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,6 +15,51 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! [Apache Parquet](http://parquet.apache.org) is a columnar storage format that
+//! provides efficient data compression and encoding schemes to improve performance of
+//! handling complex nested data structures. Parquet implements record-shredding and
+//! assembly algorithm described in the Dremel paper.
+//!
+//! Crate provides API to access file schema and metadata from a Parquet file, extract
+//! row groups or column chunks from a file, or read records/values.
+//!
+//! # Example
+//! ```rust
+//! use std::fs::File;
+//! use std::path::Path;
+//! use parquet::file::reader::{FileReader, SerializedFileReader};
+//!
+//! // Opening a file
+//! let path = Path::new("data/alltypes_plain.parquet");
+//! let file = File::open(&path).expect("File should exist");
+//! let reader = SerializedFileReader::new(file).expect("Reader should open the file");
+//!
+//! // Accessing file metadata or row group metadata
+//! let metadata = reader.metadata();
+//! let file_metadata = metadata.file_metadata();
+//! for i in 0..metadata.num_row_groups() {
+//!   let row_group_metadata = metadata.row_group(i);
+//! }
+//!
+//! // Accessing row group readers in a file
+//! for i in 0..reader.num_row_groups() {
+//!   let row_group_reader = reader.get_row_group(i).expect("Should be okay");
+//! }
+//!
+//! // Reading data using record API
+//! let mut iter = reader.get_row_iter(None).expect("Should be okay");
+//! while let Some(record) = iter.next() {
+//!   // do something with the record...
+//! }
+//! ```
+//!
+//! # Schema and metadata
+//!
+//! # File and row group API
+//!
+//! # Read API
+//!
+
 #![feature(type_ascription)]
 #![feature(rustc_private)]
 #![feature(specialization)]

From cc88b2f8ea6aabf8cb023004f9074cf2102cee74 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@team.telstra.com>
Date: Fri, 6 Apr 2018 09:21:03 +1200
Subject: [PATCH 03/32] add doc build in readme

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index d38a181..e134585 100644
--- a/README.md
+++ b/README.md
@@ -43,5 +43,8 @@ be printed).
 ## Benchmarks
 Run `cargo bench` for benchmarks.
 
+## Docs
+To build documentation, run `cargo doc --no-deps`. To compile and view in the browser, run `cargo doc --no-deps --open`.
+
 ## License
 Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.

From 5801b6ed4874d7c4fc221a5f326701c708254399 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Fri, 6 Apr 2018 10:14:26 +1200
Subject: [PATCH 04/32] update lib doc

---
 src/lib.rs | 53 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0cb9414..b09962f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,33 +32,62 @@
 //! // Opening a file
 //! let path = Path::new("data/alltypes_plain.parquet");
 //! let file = File::open(&path).expect("File should exist");
-//! let reader = SerializedFileReader::new(file).expect("Reader should open the file");
+//! let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
 //!
-//! // Accessing file metadata or row group metadata
-//! let metadata = reader.metadata();
-//! let file_metadata = metadata.file_metadata();
-//! for i in 0..metadata.num_row_groups() {
-//!   let row_group_metadata = metadata.row_group(i);
-//! }
-//!
-//! // Accessing row group readers in a file
-//! for i in 0..reader.num_row_groups() {
-//!   let row_group_reader = reader.get_row_group(i).expect("Should be okay");
+//! // Accessing Parquet metadata
+//! let parquet_metadata = reader.metadata();
+//! let file_metadata = parquet_metadata.file_metadata();
+//! for i in 0..parquet_metadata.num_row_groups() {
+//!   // Accessing row group metadata
+//!   let row_group_metadata = parquet_metadata.row_group(i);
+//!   // Accessing column chunk metadata
+//!   for j in 0..row_group_metadata.num_columns() {
+//!     let column_chunk_metadata = row_group_metadata.column(j);
+//!   }
 //! }
 //!
 //! // Reading data using record API
 //! let mut iter = reader.get_row_iter(None).expect("Should be okay");
 //! while let Some(record) = iter.next() {
 //!   // do something with the record...
+//!   println!("{}", record);
 //! }
+//!
+//! // Accessing row group readers in a file
+//! for i in 0..reader.num_row_groups() {
+//!   let row_group_reader = reader.get_row_group(i).expect("Should be okay");
+//! }
+//!
 //! ```
 //!
-//! # Schema and metadata
+//! # Metadata
+//!
+//! Module [`file::metadata`] contains Parquet metadata structs, including file metadata,
+//! that has information about file schema, version, and number of rows, row group
+//! metadata with a set of column chunks that contain column type and encodings, number
+//! of values and compressed/uncompressed size in bytes.
+//!
+//! # Schema and type
+//!
+//! Parquet schema can be extracted from [`file::metadata::FileMetaData`] and is
+//! represented by Parquet type.
+//!
+//! Parquet type is described by [`schema::types::Type`], including top level message
+//! type or schema. Refer to the [`schema`] module for the detailed information.
 //!
 //! # File and row group API
 //!
+//! File reader [`file::reader::FileReader`] is a starting point for working with Parquet
+//! files. Provides set of methods to get file metadata, row group readers
+//! [`file::reader::RowGroupReader`] to get access to column readers and record iterator.
+//!
 //! # Read API
 //!
+//! Crate has several methods to read data from a Parquet file:
+//! - Low level column reader API (see [`column`] module)
+//! - Arrow API (_TODO_)
+//! - High level record API (see [`record`] module)
+//!
 
 #![feature(type_ascription)]
 #![feature(rustc_private)]

From b3cc916617b0378db1c0df1890d5265b88dbed74 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Fri, 6 Apr 2018 16:49:13 +1200
Subject: [PATCH 05/32] update docs for basic.rs

---
 src/basic.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 92 insertions(+), 4 deletions(-)

diff --git a/src/basic.rs b/src/basic.rs
index 2ab53ed..10bfa77 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains Rust mappings for Thrift definition.
+//! See `parquet.thrift` file to see raw definitions of enums listed below.
+
 use std::convert;
 use std::fmt;
 use std::result;
@@ -27,7 +30,13 @@ use parquet_thrift::parquet;
 // ----------------------------------------------------------------------
 // Types from the Thrift definition
 
-/// Mirrors `parquet::Type`
+// Mirrors `parquet::Type`
+
+/// Types supported by Parquet.
+/// These types are intended to be used in combination with the encodings to control
+/// the on disk storage format.
+/// For example INT16 is not included as a type since a good encoding of INT32
+/// would handle this.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Type {
   BOOLEAN,
@@ -40,43 +49,122 @@ pub enum Type {
   FIXED_LEN_BYTE_ARRAY
 }
 
-/// Mirrors `parquet::ConvertedType`
+// Mirrors `parquet::ConvertedType`
+
+/// Common types (logical types) used by frameworks when using Parquet.
+/// This helps map between types in those frameworks to the base types in Parquet.
+/// This is only metadata and not needed to read or write the data.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum LogicalType {
   NONE,
+  /// A BYTE_ARRAY actually contains UTF8 encoded chars.
   UTF8,
+
+  /// A map is converted as an optional field containing a repeated key/value pair.
   MAP,
+
+  /// A key/value pair is converted into a group of two fields.
   MAP_KEY_VALUE,
+
+  /// A list is converted into an optional field containing a repeated field for its
+  /// values.
   LIST,
+
+  /// An enum is converted into a binary field
   ENUM,
+
+  /// A decimal value.
+  /// This may be used to annotate binary or fixed primitive types. The
+  /// underlying byte array stores the unscaled value encoded as two's
+  /// complement using big-endian byte order (the most significant byte is the
+  /// zeroth element). The value of the decimal is the value /// 10^{-scale}.
+  ///
+  /// This must be accompanied by a (maximum) precision and a scale in the
+  /// SchemaElement. The precision specifies the number of digits in the decimal
+  /// and the scale stores the location of the decimal point. For example 1.23
+  /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
+  /// 2 digits over).
   DECIMAL,
+
+  /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
   DATE,
+
+  /// The total number of milliseconds since midnight. The value is stored as an INT32
+  /// physical type.
   TIME_MILLIS,
+
+  /// The total number of microseconds since midnight. The value is stored as an INT64
+  /// physical type.
   TIME_MICROS,
+
+  /// Date and time recorded as milliseconds since the Unix epoch.
+  /// Recorded as a physical type of INT64.
   TIMESTAMP_MILLIS,
+
+  /// Date and time recorded as microseconds since the Unix epoch.
+  /// The value is stored as an INT64 physical type.
   TIMESTAMP_MICROS,
+
+  /// An unsigned 8 bit integer value stored as INT32 physical type.
   UINT_8,
+
+  /// An unsigned 16 bit integer value stored as INT32 physical type.
   UINT_16,
+
+  /// An unsigned 32 bit integer value stored as INT32 physical type.
   UINT_32,
+
+  /// An unsigned 64 bit integer value stored as INT64 physical type.
   UINT_64,
+
+  /// A signed 8 bit integer value stored as INT32 physical type.
   INT_8,
+
+  /// A signed 16 bit integer value stored as INT32 physical type.
   INT_16,
+
+  /// A signed 32 bit integer value stored as INT32 physical type.
   INT_32,
+
+  /// A signed 64 bit integer value stored as INT64 physical type.
   INT_64,
+
+  /// A JSON document embedded within a single UTF8 column.
   JSON,
+
+  /// A BSON document embedded within a single BINARY column.
   BSON,
+
+  /// An interval of time.
+  ///
+  /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
+  /// This data is composed of three separate little endian unsigned integers.
+  /// Each stores a component of a duration of time. The first integer identifies
+  /// the number of months associated with the duration, the second identifies
+  /// the number of days associated with the duration and the third identifies
+  /// the number of milliseconds associated with the provided duration.
+  /// This duration of time is independent of any particular timezone or date.
   INTERVAL
 }
 
-/// Mirrors `parquet::FieldRepetitionType`
+// Mirrors `parquet::FieldRepetitionType`
+
+/// Representation of field types in schema.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Repetition {
+  /// Field is required (can not be null) and each record has exactly 1 value.
   REQUIRED,
+  /// Field is optional (can be null) and each record has 0 or 1 values.
   OPTIONAL,
+  /// Field is repeated and can contain 0 or more values.
   REPEATED
 }
 
-/// Mirrors `parquet::Encoding`
+// Mirrors `parquet::Encoding`
+
+/// Encodings supported by Parquet.
+/// Not all encodings are valid for all types. These enums are also used to specify the
+/// encoding of definition and repetition levels.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum Encoding {
   PLAIN,

From efc7c371cbf4daf86e9b601353d4cc03f48ae70d Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Sat, 7 Apr 2018 11:49:14 +1200
Subject: [PATCH 06/32] add license header

---
 build.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/build.rs b/build.rs
index 2d39e7e..10f92c9 100644
--- a/build.rs
+++ b/build.rs
@@ -1,3 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
 use std::env;
 use std::fs;
 use std::process::Command;

From ecc1bc9f36d9c521ef55dd2add67f726895b482b Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Sat, 7 Apr 2018 12:18:46 +1200
Subject: [PATCH 07/32] add docs for basic and errors

---
 src/basic.rs  | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
 src/errors.rs | 17 +++++++++++++++--
 src/lib.rs    |  2 +-
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/src/basic.rs b/src/basic.rs
index 10bfa77..a7ede9a 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -26,10 +26,10 @@ use std::str;
 use errors::ParquetError;
 use parquet_thrift::parquet;
 
-
 // ----------------------------------------------------------------------
 // Types from the Thrift definition
 
+// ----------------------------------------------------------------------
 // Mirrors `parquet::Type`
 
 /// Types supported by Parquet.
@@ -49,6 +49,7 @@ pub enum Type {
   FIXED_LEN_BYTE_ARRAY
 }
 
+// ----------------------------------------------------------------------
 // Mirrors `parquet::ConvertedType`
 
 /// Common types (logical types) used by frameworks when using Parquet.
@@ -77,7 +78,7 @@ pub enum LogicalType {
   /// This may be used to annotate binary or fixed primitive types. The
   /// underlying byte array stores the unscaled value encoded as two's
   /// complement using big-endian byte order (the most significant byte is the
-  /// zeroth element). The value of the decimal is the value /// 10^{-scale}.
+  /// zeroth element).
   ///
   /// This must be accompanied by a (maximum) precision and a scale in the
   /// SchemaElement. The precision specifies the number of digits in the decimal
@@ -147,6 +148,7 @@ pub enum LogicalType {
   INTERVAL
 }
 
+// ----------------------------------------------------------------------
 // Mirrors `parquet::FieldRepetitionType`
 
 /// Representation of field types in schema.
@@ -160,6 +162,7 @@ pub enum Repetition {
   REPEATED
 }
 
+// ----------------------------------------------------------------------
 // Mirrors `parquet::Encoding`
 
 /// Encodings supported by Parquet.
@@ -167,17 +170,53 @@ pub enum Repetition {
 /// encoding of definition and repetition levels.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum Encoding {
+  /// Default byte encoding.
+  /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
+  /// - INT32 - 4 bytes per value, stored as little-endian.
+  /// - INT64 - 8 bytes per value, stored as little-endian.
+  /// - FLOAT - 4 bytes per value, stored as little-endian.
+  /// - DOUBLE - 8 bytes per value, stored as little-endian.
+  /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+  /// FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
   PLAIN,
+
+  /// **Deprecated** dictionary encoding.
+  /// The values in the dictionary are encoded using PLAIN encoding.
+  /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and PLAIN
+  /// encoding is used for dictionary page.
   PLAIN_DICTIONARY,
+
+  /// Group packed run length encoding.
+  /// Usable for definition/repetition levels encoding and boolean values.
   RLE,
+
+  /// Bit packed encoding.
+  /// This can only be used if the data has a known max width.
+  /// Usable for definition/repetition levels encoding.
   BIT_PACKED,
+
+  /// Delta encoding for integers, either INT32 or INT64.
+  /// Works best on sorted data.
   DELTA_BINARY_PACKED,
+
+  /// Encoding for byte arrays to separate the length values and the data.
+  /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
   DELTA_LENGTH_BYTE_ARRAY,
+
+  /// Incremental encoding for byte arrays.
+  /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
+  /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
   DELTA_BYTE_ARRAY,
+
+  /// Dictionary encoding.
+  /// The ids are encoded using the RLE encoding.
   RLE_DICTIONARY
 }
 
-/// Mirrors `parquet::CompressionCodec`
+// ----------------------------------------------------------------------
+// Mirrors `parquet::CompressionCodec`
+
+/// Supported compression algorithms.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Compression {
   UNCOMPRESSED,
@@ -189,7 +228,11 @@ pub enum Compression {
   ZSTD
 }
 
-/// Mirrors `parquet::PageType`
+// ----------------------------------------------------------------------
+// Mirrors `parquet::PageType`
+
+/// Available data pages for Parquet file format.
+/// Note that some of the page types may not be supported.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum PageType {
   DATA_PAGE,
diff --git a/src/errors.rs b/src/errors.rs
index 01348bb..3833d57 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Common Parquet errors and macros.
+
 use std::cell;
 use std::convert;
 use std::io;
@@ -24,8 +26,11 @@ use snap;
 use thrift;
 
 quick_error! {
+  /// Set of errors that can be produced during different operations in Parquet.
   #[derive(Debug, PartialEq)]
   pub enum ParquetError {
+    /// General Parquet error.
+    /// Returned when code violates normal workflow of working with Parquet files.
     General(message: String) {
       display("Parquet error: {}", message)
       description(message)
@@ -34,10 +39,15 @@ quick_error! {
       from(e: thrift::Error) -> (format!("underlying Thrift error: {}", e))
       from(e: cell::BorrowMutError) -> (format!("underlying borrow error: {}", e))
     }
+    /// "Not yet implemented" Parquet error.
+    /// Returned when functionality is not yet available.
     NYI(message: String) {
       display("NYI: {}", message)
       description(message)
     }
+    /// "End of file" Parquet error.
+    /// Returned when IO related failures occur, e.g. when there are not enough bytes to
+    /// decode.
     EOF(message: String) {
       display("EOF: {}", message)
       description(message)
@@ -45,9 +55,11 @@ quick_error! {
   }
 }
 
+/// A specialized `Result` for Parquet errors.
 pub type Result<T> = result::Result<T, ParquetError>;
 
-/// Conversion from `ParquetError` TO other types of `Error`s
+// ----------------------------------------------------------------------
+// Conversion from `ParquetError` to other types of `Error`s
 
 impl convert::From<ParquetError> for io::Error {
   fn from(e: ParquetError) -> Self {
@@ -55,7 +67,8 @@ impl convert::From<ParquetError> for io::Error {
   }
 }
 
-/// Convenient macros for different errors
+// ----------------------------------------------------------------------
+// Convenient macros for different errors
 
 macro_rules! general_err {
   ($fmt:expr) => (ParquetError::General($fmt.to_owned()));
diff --git a/src/lib.rs b/src/lib.rs
index b09962f..85ccb7e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,7 +21,7 @@
 //! assembly algorithm described in the Dremel paper.
 //!
 //! Crate provides API to access file schema and metadata from a Parquet file, extract
-//! row groups or column chunks from a file, or read records/values.
+//! row groups or column chunks from a file, and read records/values.
 //!
 //! # Example
 //! ```rust

From 9d3cee088c95b79380cdeef22faaff2062cd726a Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Sat, 7 Apr 2018 14:37:57 +1200
Subject: [PATCH 08/32] add doc for data_type

---
 src/data_type.rs | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/data_type.rs b/src/data_type.rs
index a095ab2..492a55f 100644
--- a/src/data_type.rs
+++ b/src/data_type.rs
@@ -15,33 +15,37 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Data types that connect Parquet physical types with their Rust-specific
+//! representations.
+
 use std::mem;
 
 use basic::Type;
 use rand::{Rand, Rng};
 use util::memory::{ByteBuffer, ByteBufferPtr};
 
-// ----------------------------------------------------------------------
-// Types connect Parquet physical types with Rust-specific types
-
 // TODO: alignment?
 // TODO: we could also use [u32; 3], however it seems there is no easy way
 //   to convert [u32] to [u32; 3] in decoding.
+/// Rust representation for logical type INT96, value backed by a vector of `u32`.
 #[derive(Clone, Debug)]
 pub struct Int96 {
   value: Option<Vec<u32>>
 }
 
 impl Int96 {
+  /// Creates new INT96 type struct with no data set.
   pub fn new() -> Self {
     Int96 { value: None }
   }
 
+  /// Returns underlying data as slice of [`u32`].
   pub fn data(&self) -> &[u32] {
     assert!(self.value.is_some());
     &self.value.as_ref().unwrap()
   }
 
+  /// Sets data for this INT96 type.
   pub fn set_data(&mut self, v: Vec<u32>) {
     assert_eq!(v.len(), 3);
     self.value = Some(v);
@@ -79,31 +83,37 @@ impl Rand for Int96 {
   }
 }
 
-
+/// Rust representation for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY Parquet physical types.
+/// It is backed by a byte buffer.
 #[derive(Clone, Debug)]
 pub struct ByteArray {
   data: Option<ByteBufferPtr>
 }
 
 impl ByteArray {
+  /// Creates new byte array with no data set.
   pub fn new() -> Self {
     ByteArray { data: None }
   }
 
+  /// Gets length of the underlying byte buffer.
   pub fn len(&self) -> usize {
     assert!(self.data.is_some());
     self.data.as_ref().unwrap().len()
   }
 
+  /// Returns slice of data.
   pub fn data(&self) -> &[u8] {
     assert!(self.data.is_some());
     self.data.as_ref().unwrap().as_ref()
   }
 
+  /// Set data from another byte buffer.
   pub fn set_data(&mut self, data: ByteBufferPtr) {
     self.data = Some(data);
   }
 
+  /// Returns `ByteArray` instance with slice of values for a data.
   pub fn slice(&self, start: usize, len: usize) -> Self {
     assert!(self.data.is_some());
     Self::from(self.data.as_ref().unwrap().range(start, len))
@@ -162,11 +172,9 @@ impl Rand for ByteArray {
   }
 }
 
-
-// ----------------------------------------------------------------------
-// AsBytes converts an instance of data type to a slice of u8
-
+/// Converts an instance of data type to a slice of bytes as `u8`.
 pub trait AsBytes {
+  /// Returns slice of bytes for this data type.
   fn as_bytes(&self) -> &[u8];
 }
 
@@ -225,15 +233,16 @@ impl AsBytes for str {
   }
 }
 
-
-// ----------------------------------------------------------------------
-// DataType trait, which contains the Parquet physical type info as well as
-// the Rust primitive type presentation.
-
+/// Contains the Parquet physical type information as well as the Rust primitive type
+/// presentation.
 pub trait DataType {
   type T: ::std::cmp::PartialEq + ::std::fmt::Debug + ::std::default::Default
     + ::std::clone::Clone + Rand + AsBytes;
+
+  /// Returns Parquet physical type.
   fn get_physical_type() -> Type;
+
+  /// Returns size in bytes for Rust representation of the physical type.
   fn get_type_size() -> usize;
 }
 

From 9da396e205e6a49aafe0930d777083fe5a0dfb0e Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Sun, 8 Apr 2018 20:30:19 +1200
Subject: [PATCH 09/32] add schema doc

---
 src/basic.rs          |  4 ++--
 src/data_type.rs      |  4 ++--
 src/lib.rs            |  3 ++-
 src/schema/mod.rs     | 45 +++++++++++++++++++++++++++++++++++++++++++
 src/schema/parser.rs  | 44 ++++++++++++++++++++++++++++++++++--------
 src/schema/printer.rs | 37 ++++++++++++++++++++++++++++++++---
 src/schema/types.rs   | 45 ++++++++++++++++++++++++++++++++-----------
 7 files changed, 155 insertions(+), 27 deletions(-)

diff --git a/src/basic.rs b/src/basic.rs
index a7ede9a..c416465 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -33,8 +33,8 @@ use parquet_thrift::parquet;
 // Mirrors `parquet::Type`
 
 /// Types supported by Parquet.
-/// These types are intended to be used in combination with the encodings to control
-/// the on disk storage format.
+/// These physical types are intended to be used in combination with the encodings to
+/// control the on disk storage format.
 /// For example INT16 is not included as a type since a good encoding of INT32
 /// would handle this.
 #[derive(Debug, Clone, Copy, PartialEq)]
diff --git a/src/data_type.rs b/src/data_type.rs
index 492a55f..9db0a64 100644
--- a/src/data_type.rs
+++ b/src/data_type.rs
@@ -27,7 +27,7 @@ use util::memory::{ByteBuffer, ByteBufferPtr};
 // TODO: alignment?
 // TODO: we could also use [u32; 3], however it seems there is no easy way
 //   to convert [u32] to [u32; 3] in decoding.
-/// Rust representation for logical type INT96, value backed by a vector of `u32`.
+/// Rust representation for logical type INT96, value is backed by a vector of `u32`.
 #[derive(Clone, Debug)]
 pub struct Int96 {
   value: Option<Vec<u32>>
@@ -84,7 +84,7 @@ impl Rand for Int96 {
 }
 
 /// Rust representation for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY Parquet physical types.
-/// It is backed by a byte buffer.
+/// Value is backed by a byte buffer.
 #[derive(Clone, Debug)]
 pub struct ByteArray {
   data: Option<ByteBufferPtr>
diff --git a/src/lib.rs b/src/lib.rs
index 85ccb7e..4bc0a48 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -73,7 +73,8 @@
 //! represented by Parquet type.
 //!
 //! Parquet type is described by [`schema::types::Type`], including top level message
-//! type or schema. Refer to the [`schema`] module for the detailed information.
+//! type or schema. Refer to the [`schema`] module for the detailed information on Type
+//! API, printing and parsing of message types.
 //!
 //! # File and row group API
 //!
diff --git a/src/schema/mod.rs b/src/schema/mod.rs
index 747d0da..c64c045 100644
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -15,6 +15,51 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains definitions of Parquet schema and methods to print and parse schema.
+//!
+//! # Example
+//!
+//! ```rust
+//! use std::rc::Rc;
+//! use parquet::schema::types::Type;
+//! use parquet::basic::{Type as PhysicalType, LogicalType, Repetition};
+//! use parquet::schema::printer;
+//! use parquet::schema::parser;
+//!
+//! // Create the following schema:
+//! // message schema {
+//! //   OPTIONAL BYTE_ARRAY a (UTF8);
+//! //   REQUIRED INT32 b;
+//! // }
+//!
+//! let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
+//!   .with_logical_type(LogicalType::UTF8)
+//!   .with_repetition(Repetition::OPTIONAL)
+//!   .build()
+//!   .unwrap();
+//!
+//! let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
+//!   .with_repetition(Repetition::REQUIRED)
+//!   .build()
+//!   .unwrap();
+//!
+//! let schema = Type::group_type_builder("schema")
+//!   .with_fields(&mut vec![Rc::new(field_a), Rc::new(field_b)])
+//!   .build()
+//!   .unwrap();
+//!
+//! let mut buf = Vec::new();
+//!
+//! // Print schema into buffer
+//! printer::print_schema(&mut buf, &schema);
+//!
+//! // Parse schema from the string.
+//! let string_schema = String::from_utf8(buf).unwrap();
+//! let parsed_schema = parser::parse_message_type(&string_schema).unwrap();
+//!
+//! assert_eq!(schema, parsed_schema);
+//! ```
+
 pub mod types;
 pub mod printer;
 pub mod parser;
diff --git a/src/schema/parser.rs b/src/schema/parser.rs
index 8cc671b..7f704bc 100644
--- a/src/schema/parser.rs
+++ b/src/schema/parser.rs
@@ -15,13 +15,41 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Parquet schema parser.
+//! Provides methods to parse and validate string message type into Parquet [`Type`].
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::schema::parser::parse_message_type;
+//!
+//! let message_type = "
+//!   message spark_schema {
+//!     OPTIONAL BYTE_ARRAY a (UTF8);
+//!     REQUIRED INT32 b;
+//!     REQUIRED DOUBLE c;
+//!     REQUIRED BOOLEAN d;
+//!     OPTIONAL group e (LIST) {
+//!       REPEATED group list {
+//!         REQUIRED INT32 element;
+//!       }
+//!     }
+//!   }
+//! ";
+//!
+//! let schema = parse_message_type(message_type).expect("Expected valid schema");
+//! println!("{:?}", schema);
+//! ```
+
 use std::rc::Rc;
 
 use basic::{LogicalType, Repetition, Type as PhysicalType};
 use errors::{ParquetError, Result};
 use schema::types::{Type, TypePtr};
 
-/// Parses message type into `Type` class that can be used to extract individual columns
+/// Parses message type as string into a Parquet `Type` which, for example, could be
+/// used to extract individual columns. Returns Parquet general error when parsing or
+/// validation fails.
 pub fn parse_message_type<'a>(message_type: &'a str) -> Result<Type> {
   let mut parser = Parser { tokenizer: &mut Tokenizer::from_str(message_type) };
   parser.parse_message_type()
@@ -92,14 +120,14 @@ impl<'a> Iterator for Tokenizer<'a> {
   }
 }
 
-/// Internal Schema parser
+/// Internal Schema parser.
 /// Traverses message type using tokenizer and parses each group/primitive type
 /// recursively.
 struct Parser<'a> {
   tokenizer: &'a mut Tokenizer<'a>
 }
 
-// Utility function to assert token on validity
+// Utility function to assert token on validity.
 fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
   match token {
     Some(value) if value == expected => Ok(()),
@@ -108,7 +136,7 @@ fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
   }
 }
 
-// Utility function to parse i32 or return general error
+// Utility function to parse i32 or return general error.
 fn parse_i32(
   value: Option<&str>,
   not_found_msg: &str,
@@ -121,9 +149,9 @@ fn parse_i32(
 }
 
 impl<'a> Parser<'a> {
-  // Entry function to parse message type, uses internal tokenizer
+  // Entry function to parse message type, uses internal tokenizer.
   fn parse_message_type(&mut self) -> Result<Type> {
-    // Check that message type starts with "message"
+    // Check that message type starts with "message".
     match self.tokenizer.next() {
       Some("message") => {
         let name = self.tokenizer.next()
@@ -139,8 +167,8 @@ impl<'a> Parser<'a> {
     }
   }
 
-  // Parse child types for a current group type.
-  // This is only invoked on root and group types
+  // Parses child types for a current group type.
+  // This is only invoked on root and group types.
   fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
     assert_token(self.tokenizer.next(), "{")?;
     let mut vec = Vec::new();
diff --git a/src/schema/printer.rs b/src/schema/printer.rs
index 7ceb49f..f17b302 100644
--- a/src/schema/printer.rs
+++ b/src/schema/printer.rs
@@ -15,6 +15,35 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Parquet schema printer.
+//! Provides methods to print Parquet file schema and list file metadata.
+//!
+//! # Example
+//!
+//! ```rust
+//! use std::fs::File;
+//! use std::path::Path;
+//! use parquet::file::reader::{FileReader, SerializedFileReader};
+//! use parquet::schema::printer::{
+//!   print_parquet_metadata,
+//!   print_file_metadata,
+//!   print_schema
+//! };
+//!
+//! // Open a file
+//! let path = Path::new("data/alltypes_plain.parquet");
+//! let file = File::open(&path).expect("File should exist");
+//! let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
+//!
+//! let parquet_metadata = reader.metadata();
+//!
+//! print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
+//!
+//! print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
+//!
+//! print_schema(&mut std::io::stdout(), &parquet_metadata.file_metadata().schema());
+//! ```
+
 use std::fmt;
 use std::io;
 
@@ -27,7 +56,7 @@ use file::metadata::{
 };
 use schema::types::Type;
 
-/// Prints Parquet metadata
+/// Prints Parquet metadata [`ParquetMetaData`] information.
 #[allow(unused_must_use)]
 pub fn print_parquet_metadata(out: &mut io::Write, metadata: &ParquetMetaData) {
   print_file_metadata(out, &metadata.file_metadata());
@@ -43,7 +72,7 @@ pub fn print_parquet_metadata(out: &mut io::Write, metadata: &ParquetMetaData) {
   }
 }
 
-/// Prints metadata information from `file_metadata`.
+/// Prints file metadata [`FileMetaData`] information.
 #[allow(unused_must_use)]
 pub fn print_file_metadata(out: &mut io::Write, file_metadata: &FileMetaData) {
   writeln!(out, "version: {}", file_metadata.version());
@@ -55,6 +84,7 @@ pub fn print_file_metadata(out: &mut io::Write, file_metadata: &FileMetaData) {
   print_schema(out, schema);
 }
 
+/// Prints Parquet [`Type`] information.
 #[allow(unused_must_use)]
 pub fn print_schema(out: &mut io::Write, tp: &Type) {
   // TODO: better if we can pass fmt::Write to Printer.
@@ -123,7 +153,8 @@ fn print_dashes(out: &mut io::Write, num: i32) {
 
 const INDENT_WIDTH: i32 = 2;
 
-pub struct Printer<'a> {
+/// Struct for printing Parquet message type.
+struct Printer<'a> {
   output: &'a mut fmt::Write,
   indent: i32
 }
diff --git a/src/schema/types.rs b/src/schema/types.rs
index 37fab9d..cb5d0f7 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -50,6 +50,7 @@ pub enum Type {
 }
 
 impl Type {
+  /// Creates primitive type builder with provided field name and physical type.
   pub fn primitive_type_builder(
     name: &str,
     physical_type: PhysicalType
@@ -57,10 +58,12 @@ impl Type {
     PrimitiveTypeBuilder::new(name, physical_type)
   }
 
+  /// Creates group type builder with provided column name.
   pub fn group_type_builder(name: &str) -> GroupTypeBuilder {
     GroupTypeBuilder::new(name)
   }
 
+  /// Returns [`BasicTypeInfo`] information about the type.
   pub fn get_basic_info(&self) -> &BasicTypeInfo {
     match *self {
       Type::PrimitiveType { ref basic_info, .. } => &basic_info,
@@ -68,21 +71,23 @@ impl Type {
     }
   }
 
+  /// Returns this type's field name.
   pub fn name(&self) -> &str {
     self.get_basic_info().name()
   }
 
   /// Gets the fields from this group type.
-  /// NOTE: this will panic if called on a non-group type.
+  /// Note that this will panic if called on a non-group type.
   // TODO: should we return `&[&Type]` here?
   pub fn get_fields(&self) -> &[TypePtr] {
     match *self {
-      Type::GroupType{ ref fields, .. } => &fields[..],
+      Type::GroupType { ref fields, .. } => &fields[..],
       _ => panic!("Cannot call get_fields() on a non-group type")
     }
   }
 
-  /// Get physical type or panic if current type is not primitive
+  /// Gets physical type of this primitive type.
+  /// Note that this will panic if called on a non-primitive type.
   pub fn get_physical_type(&self) -> PhysicalType {
     match *self {
       Type::PrimitiveType { basic_info: _, physical_type, .. } => physical_type,
@@ -90,7 +95,8 @@ impl Type {
     }
   }
 
-  /// Check if `sub_type` schema is part of current schema, e.g. projected columns
+  /// Checks if `sub_type` schema is part of current schema.
+  /// This method can be used to check if projected columns are part of the root schema.
   pub fn check_contains(&self, sub_type: &Type) -> bool {
     // Names match, and repetitions match or not set for both
     let basic_match = self.get_basic_info().name() == sub_type.get_basic_info().name() &&
@@ -122,7 +128,7 @@ impl Type {
     }
   }
 
-  /// Whether this is a primitive type.
+  /// Returns `true` if this type is a primitive type, `false` otherwise.
   pub fn is_primitive(&self) -> bool {
     match *self {
       Type::PrimitiveType { .. } => true,
@@ -130,7 +136,7 @@ impl Type {
     }
   }
 
-  /// Whether this is a group type.
+  /// Returns `true` if this type is a group type, `false` otherwise.
   pub fn is_group(&self) -> bool {
     match *self {
       Type::GroupType { .. } => true,
@@ -138,7 +144,7 @@ impl Type {
     }
   }
 
-  /// Whether this is the top-level schema type (message type).
+  /// Returns `true` if this type is the top-level schema type (message type).
   pub fn is_schema(&self) -> bool {
     match *self {
       Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
@@ -162,7 +168,8 @@ pub struct PrimitiveTypeBuilder<'a> {
 }
 
 impl<'a> PrimitiveTypeBuilder<'a> {
-  fn new(name: &'a str, physical_type: PhysicalType) -> Self {
+  /// Creates new primitive type builder with provided field name and physical type.
+  pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
     Self {
       name: name,
       repetition: Repetition::OPTIONAL,
@@ -175,38 +182,49 @@ impl<'a> PrimitiveTypeBuilder<'a> {
     }
   }
 
+  /// Sets [`Repetition`] for this field and returns itself.
   pub fn with_repetition(mut self, repetition: Repetition) -> Self {
     self.repetition = repetition;
     self
   }
 
+  /// Sets [`LogicalType`] for this field and returns itself.
   pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
     self.logical_type = logical_type;
     self
   }
 
+  /// Sets type length.
+  /// This is only applied to FIXED_LEN_BYTE_ARRAY and INT96 (INTERVAL) types, because
+  /// they maintain fixed size underlying byte array.
+  /// By default, value is `0`.
   pub fn with_length(mut self, length: i32) -> Self {
     self.length = length;
     self
   }
 
+  /// Sets precision for Parquet DECIMAL physical type.
+  /// By default, it equals to `0` and used only for decimal context.
   pub fn with_precision(mut self, precision: i32) -> Self {
     self.precision = precision;
     self
   }
 
+  /// Sets scale for Parquet DECIMAL physical type.
+  /// By default, it equals to `0` and used only for decimal context.
   pub fn with_scale(mut self, scale: i32) -> Self {
     self.scale = scale;
     self
   }
 
+  /// Sets optional field id.
   pub fn with_id(mut self, id: i32) -> Self {
     self.id = Some(id);
     self
   }
 
-  // Creates a new `PrimitiveType` instance from the gathered attributes.
-  // This also checks various illegal conditions and returns `Err` if that that happen.
+  /// Creates a new `PrimitiveType` instance from the gathered attributes.
+  /// Also checks various illegal conditions and returns `Err` if that that happen.
   pub fn build(self) -> Result<Type> {
     let basic_info = BasicTypeInfo {
       name: String::from(self.name),
@@ -304,6 +322,7 @@ pub struct GroupTypeBuilder<'a> {
 }
 
 impl<'a> GroupTypeBuilder<'a> {
+  /// Creates new group type builder with provided field name.
   pub fn new(name: &'a str) -> Self {
     Self {
       name: name,
@@ -314,27 +333,31 @@ impl<'a> GroupTypeBuilder<'a> {
     }
   }
 
+  /// Sets [`Repetition`] for this field and returns itself.
   pub fn with_repetition(mut self, repetition: Repetition) -> Self {
     self.repetition = Some(repetition);
     self
   }
 
+  /// Sets [`LogicalType`] for this field and returns itself.
   pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
     self.logical_type = logical_type;
     self
   }
 
+  /// Sets a list of fields that should be child nodes of this field.
   pub fn with_fields(mut self, fields: &mut Vec<TypePtr>) -> Self {
     self.fields.append(fields);
     self
   }
 
+  /// Sets optional field id.
   pub fn with_id(mut self, id: i32) -> Self {
     self.id = Some(id);
     self
   }
 
-  // Create a new `GroupType` instance from the gathered attributes.
+  /// Creates a new `GroupType` instance from the gathered attributes.
   pub fn build(self) -> Result<Type> {
     let basic_info = BasicTypeInfo {
       name: String::from(self.name),

From 0092fcab872cb62f1ab2e9f028d63ae3113f1e4b Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Sun, 8 Apr 2018 22:07:42 +1200
Subject: [PATCH 10/32] update schema docs

---
 src/schema/mod.rs   |  8 ++---
 src/schema/types.rs | 82 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/src/schema/mod.rs b/src/schema/mod.rs
index c64c045..5580ec1 100644
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -21,12 +21,12 @@
 //!
 //! ```rust
 //! use std::rc::Rc;
-//! use parquet::schema::types::Type;
 //! use parquet::basic::{Type as PhysicalType, LogicalType, Repetition};
-//! use parquet::schema::printer;
-//! use parquet::schema::parser;
+//! use parquet::schema::{parser, printer};
+//! use parquet::schema::types::Type;
 //!
 //! // Create the following schema:
+//! //
 //! // message schema {
 //! //   OPTIONAL BYTE_ARRAY a (UTF8);
 //! //   REQUIRED INT32 b;
@@ -53,7 +53,7 @@
 //! // Print schema into buffer
 //! printer::print_schema(&mut buf, &schema);
 //!
-//! // Parse schema from the string.
+//! // Parse schema from the string
 //! let string_schema = String::from_utf8(buf).unwrap();
 //! let parsed_schema = parser::parse_message_type(&string_schema).unwrap();
 //!
diff --git a/src/schema/types.rs b/src/schema/types.rs
index cb5d0f7..1277590 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains structs and methods to build Parquet schema and schema descriptors.
+
 use std::collections::HashMap;
 use std::convert::From;
 use std::fmt;
@@ -27,11 +29,15 @@ use parquet_thrift::parquet::SchemaElement;
 // ----------------------------------------------------------------------
 // Parquet Type definitions
 
+/// Type alias for `Rc<Type>`.
 pub type TypePtr = Rc<Type>;
+/// Type alias for `Rc<SchemaDescriptor>`.
 pub type SchemaDescPtr = Rc<SchemaDescriptor>;
+/// Type alias for `Rc<ColumnDescriptor>`.
 pub type ColumnDescPtr = Rc<ColumnDescriptor>;
 
 /// Representation of a Parquet type.
+/// Used to describe primitive leaf fields and structs, including top-level schema.
 /// Note that the top-level schema type is represented using `GroupType` whose
 /// repetition is `None`.
 #[derive(Debug, PartialEq)]
@@ -194,7 +200,7 @@ impl<'a> PrimitiveTypeBuilder<'a> {
     self
   }
 
-  /// Sets type length.
+  /// Sets type length and returns itself.
   /// This is only applied to FIXED_LEN_BYTE_ARRAY and INT96 (INTERVAL) types, because
   /// they maintain fixed size underlying byte array.
   /// By default, value is `0`.
@@ -203,21 +209,21 @@ impl<'a> PrimitiveTypeBuilder<'a> {
     self
   }
 
-  /// Sets precision for Parquet DECIMAL physical type.
+  /// Sets precision for Parquet DECIMAL physical type and returns itself.
   /// By default, it equals to `0` and used only for decimal context.
   pub fn with_precision(mut self, precision: i32) -> Self {
     self.precision = precision;
     self
   }
 
-  /// Sets scale for Parquet DECIMAL physical type.
+  /// Sets scale for Parquet DECIMAL physical type and returns itself.
   /// By default, it equals to `0` and used only for decimal context.
   pub fn with_scale(mut self, scale: i32) -> Self {
     self.scale = scale;
     self
   }
 
-  /// Sets optional field id.
+  /// Sets optional field id and returns itself.
   pub fn with_id(mut self, id: i32) -> Self {
     self.id = Some(id);
     self
@@ -346,12 +352,13 @@ impl<'a> GroupTypeBuilder<'a> {
   }
 
   /// Sets a list of fields that should be child nodes of this field.
+  /// Returns updated self.
   pub fn with_fields(mut self, fields: &mut Vec<TypePtr>) -> Self {
     self.fields.append(fields);
     self
   }
 
-  /// Sets optional field id.
+  /// Sets optional field id and returns itself.
   pub fn with_id(mut self, id: i32) -> Self {
     self.id = Some(id);
     self
@@ -383,27 +390,35 @@ pub struct BasicTypeInfo {
 }
 
 impl BasicTypeInfo {
+  /// Returns field name.
   pub fn name(&self) -> &str {
     &self.name
   }
 
+  /// Returns `true` if type has repetition field set, `false` otherwise.
+  /// This is mostly applied to group type, because primitive type always has
+  /// repetition set.
   pub fn has_repetition(&self) -> bool {
     self.repetition.is_some()
   }
 
+  /// Returns [`Repetition`] value for the type.
   pub fn repetition(&self) -> Repetition {
     assert!(self.repetition.is_some());
     self.repetition.unwrap()
   }
 
+  /// Returns [`LogicalType`] value for the type.
   pub fn logical_type(&self) -> LogicalType {
     self.logical_type
   }
 
+  /// Returns `true` if id is set, `false` otherwise.
   pub fn has_id(&self) -> bool {
     self.id.is_some()
   }
 
+  /// Returns id value for the type.
   pub fn id(&self) -> i32 {
     assert!(self.id.is_some());
     self.id.unwrap()
@@ -421,10 +436,22 @@ pub struct ColumnPath {
 }
 
 impl ColumnPath {
+  /// Creates new column path from vector of field names.
   pub fn new(parts: Vec<String>) -> Self {
     ColumnPath { parts: parts }
   }
 
+  /// Returns string representation of this column path.
+  /// ```rust
+  /// use parquet::schema::types::ColumnPath;
+  ///
+  /// let path = ColumnPath::new(vec![
+  ///   "a".to_string(),
+  ///   "b".to_string(),
+  ///   "c".to_string()
+  /// ]);
+  /// assert_eq!(&path.string(), "a.b.c");
+  /// ```
   pub fn string(&self) -> String {
     self.parts.join(".")
   }
@@ -457,7 +484,6 @@ impl From<String> for ColumnPath {
   }
 }
 
-
 /// A descriptor for leaf-level primitive columns.
 /// This encapsulates information such as definition and repetition levels and is used to
 /// re-assemble nested data.
@@ -483,6 +509,7 @@ pub struct ColumnDescriptor {
 }
 
 impl ColumnDescriptor {
+  /// Creates new descriptor for leaf-level column.
   pub fn new(
     primitive_type: TypePtr,
     root_type: Option<TypePtr>,
@@ -499,63 +526,76 @@ impl ColumnDescriptor {
     }
   }
 
+  /// Returns maximum definition level for this column.
   pub fn max_def_level(&self) -> i16 {
     self.max_def_level
   }
 
+  /// Returns maximum repetition level for this column.
   pub fn max_rep_level(&self) -> i16 {
     self.max_rep_level
   }
 
+  /// Returns [`ColumnPath`] for this column.
   pub fn path(&self) -> &ColumnPath {
     &self.path
   }
 
+  /// Returns root [`Type`] (most top-level parent field) for this leaf column.
   pub fn root_type(&self) -> &Type {
     assert!(self.root_type.is_some());
     self.root_type.as_ref().unwrap()
   }
 
+  /// Returns column name.
   pub fn name(&self) -> &str {
     self.primitive_type.name()
   }
 
+  /// Returns [`LogicalType`] for this column.
   pub fn logical_type(&self) -> LogicalType {
     self.primitive_type.get_basic_info().logical_type()
   }
 
+  /// Returns physical type for this column.
+  /// Note that it will panic if called on a non-primitive type.
   pub fn physical_type(&self) -> PhysicalType {
     match self.primitive_type.as_ref() {
-      &Type::PrimitiveType{ physical_type, .. } => physical_type,
+      &Type::PrimitiveType { physical_type, .. } => physical_type,
       _ => panic!("Expected primitive type!")
     }
   }
 
+  /// Returns type length for this column.
+  /// Note that it will panic if called on a non-primitive type.
   pub fn type_length(&self) -> i32 {
     match self.primitive_type.as_ref() {
-      &Type::PrimitiveType{ type_length, .. } => type_length,
+      &Type::PrimitiveType { type_length, .. } => type_length,
       _ => panic!("Expected primitive type!")
     }
   }
 
+  /// Returns type precision for this column.
+  /// Note that it will panic if called on a non-primitive type.
   pub fn type_precision(&self) -> i32 {
     match self.primitive_type.as_ref() {
-      &Type::PrimitiveType{ precision, .. } => precision,
+      &Type::PrimitiveType { precision, .. } => precision,
       _ => panic!("Expected primitive type!")
     }
   }
 
+  /// Returns type scale for this column.
+  /// Note that it will panic if called on a non-primitive type.
   pub fn type_scale(&self) -> i32 {
     match self.primitive_type.as_ref() {
-      &Type::PrimitiveType{ scale, .. } => scale,
+      &Type::PrimitiveType { scale, .. } => scale,
       _ => panic!("Expected primitive type!")
     }
   }
-
 }
 
-/// A schema descriptor. This encapsulates the top-level schemas for all the columns, as
-/// well as all descriptors for all the primitive columns.
+/// A schema descriptor. This encapsulates the top-level schemas for all the columns,
+/// as well as all descriptors for all the primitive columns.
 pub struct SchemaDescriptor {
   // The top-level schema (the "message" type).
   // This must be a `GroupType` where each field is a root column type in the schema.
@@ -567,7 +607,7 @@ pub struct SchemaDescriptor {
 
   // Mapping from a leaf column's index to the root column type that it
   // comes from. For instance: the leaf `a.b.c.d` would have a link back to `a`:
-  // -- a  <------
+  // -- a  <-----+
   // -- -- b     |
   // -- -- -- c  |
   // -- -- -- -- d
@@ -575,6 +615,7 @@ pub struct SchemaDescriptor {
 }
 
 impl SchemaDescriptor {
+  /// Creates new schema descriptor from Parquet schema.
   pub fn new(tp: TypePtr) -> Self {
     assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
     let mut leaves = vec![];
@@ -596,11 +637,11 @@ impl SchemaDescriptor {
     Self {
       schema: tp,
       leaves: leaves,
-      leaf_to_base:
-      leaf_to_base
+      leaf_to_base: leaf_to_base
     }
   }
 
+  /// Returns [`ColumnDescriptor`] for a field position.
   pub fn column(&self, i: usize) -> ColumnDescPtr {
     assert!(
       i < self.leaves.len(),
@@ -611,14 +652,17 @@ impl SchemaDescriptor {
     self.leaves[i].clone()
   }
 
+  /// Returns slice of [`ColumnDescriptor`].
   pub fn columns(&self) -> &[ColumnDescPtr] {
     &self.leaves
   }
 
+  /// Returns number of leaf-level columns.
   pub fn num_columns(&self) -> usize {
     self.leaves.len()
   }
 
+  /// Returns column root [`Type`] for a field position.
   pub fn get_column_root(&self, i: usize) -> &Type {
     assert!(
       i < self.leaves.len(),
@@ -631,10 +675,12 @@ impl SchemaDescriptor {
     result.unwrap().as_ref()
   }
 
+  /// Returns schema as [`Type`].
   pub fn root_schema(&self) -> &Type {
     self.schema.as_ref()
   }
 
+  /// Returns schema name.
   pub fn name(&self) -> &str {
     self.schema.name()
   }
@@ -692,8 +738,8 @@ fn build_tree(
   }
 }
 
-/// Conversion from Thrift equivalents
-
+/// Conversion from Thrift equivalents.
+/// Should be considered private.
 pub fn from_thrift(elements: &mut [SchemaElement]) -> Result<TypePtr> {
   let mut index = 0;
   let mut schema_nodes = Vec::new();

From 1c2420e2a8db6d70cf71fd68731e8ba25486e4a3 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Mon, 9 Apr 2018 21:38:29 +1200
Subject: [PATCH 11/32] add record docs

---
 src/record/api.rs    | 21 ++++++++++++---------
 src/record/mod.rs    |  2 ++
 src/record/reader.rs | 13 ++++++++-----
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/record/api.rs b/src/record/api.rs
index ba43578..1431d3b 100644
--- a/src/record/api.rs
+++ b/src/record/api.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains Row enum that is used to represent record in Rust.
+
 use std::fmt;
 
 use basic::{LogicalType, Type as PhysicalType};
@@ -32,8 +34,7 @@ macro_rules! nyi {
   });
 }
 
-/// Row API to represent nested Parquet record.
-/// Use implementation methods to update how Parquet types are mapped to Row.
+/// Row API to represent a nested Parquet record.
 #[derive(Clone, Debug, PartialEq)]
 pub enum Row {
   // Primitive types
@@ -55,7 +56,7 @@ pub enum Row {
 }
 
 impl Row {
-  /// Converts BOOLEAN into boolean value.
+  /// Converts Parquet BOOLEAN type with logical type into `bool` value.
   pub fn convert_bool(
     _physical_type: PhysicalType,
     _logical_type: LogicalType,
@@ -64,7 +65,7 @@ impl Row {
     Row::Bool(value)
   }
 
-  // Converts INT32 into integer value.
+  /// Converts Parquet INT32 type with logical type into `i32` value.
   pub fn convert_int32(
     physical_type: PhysicalType,
     logical_type: LogicalType,
@@ -78,7 +79,7 @@ impl Row {
     }
   }
 
-  /// Converts INT64 into long value.
+  /// Converts Parquet INT64 type with logical type into `i64` value.
   pub fn convert_int64(
     physical_type: PhysicalType,
     logical_type: LogicalType,
@@ -90,7 +91,8 @@ impl Row {
     }
   }
 
-  /// Converts nanosecond timestamps stored as INT96 into milliseconds
+  /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
+  /// `Timestamp` value.
   pub fn convert_int96(
     _physical_type: PhysicalType,
     _logical_type: LogicalType,
@@ -108,7 +110,7 @@ impl Row {
     Row::Timestamp(millis)
   }
 
-  /// Converts FLOAT into float value.
+  /// Converts Parquet FLOAT type with logical type into `f32` value.
   pub fn convert_float(
     _physical_type: PhysicalType,
     _logical_type: LogicalType,
@@ -117,7 +119,7 @@ impl Row {
     Row::Float(value)
   }
 
-  /// Converts DOUBLE into double value.
+  /// Converts Parquet DOUBLE type with logical type into `f64` value.
   pub fn convert_double(
     _physical_type: PhysicalType,
     _logical_type: LogicalType,
@@ -126,7 +128,8 @@ impl Row {
     Row::Double(value)
   }
 
-  /// Converts BYTE_ARRAY into either UTF8 string or array of bytes.
+  /// Converts Parquet BYTE_ARRAY type with logical type into either UTF8 string or
+  /// array of bytes.
   pub fn convert_byte_array(
     physical_type: PhysicalType,
     logical_type: LogicalType,
diff --git a/src/record/mod.rs b/src/record/mod.rs
index c59c51f..1760242 100644
--- a/src/record/mod.rs
+++ b/src/record/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains record-based API for reading Parquet files.
+
 pub mod api;
 pub mod reader;
 mod triplet;
diff --git a/src/record/reader.rs b/src/record/reader.rs
index 1c79495..c8b4eaf 100644
--- a/src/record/reader.rs
+++ b/src/record/reader.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains implementation of record assembly and converting Parquet types into `Row`s.
+
 use std::collections::HashMap;
 use std::rc::Rc;
 
@@ -29,7 +31,8 @@ use record::triplet::TripletIter;
 const DEFAULT_BATCH_SIZE: usize = 256;
 
 /// Tree builder for `Reader` enum.
-/// Serves as a container of options for building a reader tree and a builder.
+/// Serves as a container of options for building a reader tree and a builder, and
+/// accessing a records iterator [`RowIter`].
 pub struct TreeBuilder {
   // Batch size (>= 1) for triplet iterators
   batch_size: usize
@@ -476,7 +479,7 @@ impl Reader {
 // ----------------------------------------------------------------------
 // Row iterators
 
-/// Iterator of `Row`s.
+/// Iterator of [`Row`]s.
 /// It is used either for a single row group to iterate over data in that row group, or
 /// an entire file with auto buffering of all row groups.
 pub struct RowIter<'a> {
@@ -489,7 +492,7 @@ pub struct RowIter<'a> {
 }
 
 impl<'a> RowIter<'a> {
-  /// Creates iterator of `Row`s for all row groups in a file.
+  /// Creates iterator of [`Row`]s for all row groups in a file.
   pub fn from_file(proj: Option<Type>, reader: &'a FileReader) -> Result<Self> {
     let descr = Self::get_proj_descr(proj,
       reader.metadata().file_metadata().schema_descr_ptr())?;
@@ -505,7 +508,7 @@ impl<'a> RowIter<'a> {
     })
   }
 
-  /// Creates iterator of `Row`s for a specific row group.
+  /// Creates iterator of [`Row`]s for a specific row group.
   pub fn from_row_group(proj: Option<Type>, reader: &'a RowGroupReader) -> Result<Self> {
     let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
     let tree_builder = Self::tree_builder();
@@ -579,7 +582,7 @@ impl<'a> Iterator for RowIter<'a> {
   }
 }
 
-/// Internal iterator of `Row`s for a reader.
+/// Internal iterator of [`Row`]s for a reader.
 pub struct ReaderIter {
   root_reader: Reader,
   records_left: usize

From f37efd8c002bacbf7917c6356cd0f2f198540e81 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Mon, 9 Apr 2018 22:40:11 +1200
Subject: [PATCH 12/32] add column docs

---
 src/column/mod.rs    | 78 ++++++++++++++++++++++++++++++++++++++++++++
 src/column/page.rs   | 37 ++++++++++++---------
 src/column/reader.rs |  9 +++--
 src/lib.rs           |  2 +-
 4 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/src/column/mod.rs b/src/column/mod.rs
index e6ab26d..2683fc2 100644
--- a/src/column/mod.rs
+++ b/src/column/mod.rs
@@ -15,5 +15,83 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Low level column reader API.
+//!
+//! This API is designed for the direct mapping with subsequent manual handling of
+//! definition and repetition values and spacing. This allows to create column vectors
+//! and batches and map them directly to Parquet data.
+//!
+//! See below an example of using the API.
+//!
+//! # Example
+//! ```rust
+//! use std::fs::File;
+//! use std::path::Path;
+//!
+//! use parquet::basic::Type;
+//! use parquet::data_type::Int32Type;
+//! use parquet::column::reader::get_typed_column_reader;
+//! use parquet::file::reader::{FileReader, SerializedFileReader};
+//!
+//! // Open Parquet file and initialize reader
+//! let path = Path::new("data/alltypes_plain.parquet");
+//! let file = File::open(&path).unwrap();
+//! let parquet_reader = SerializedFileReader::new(file).unwrap();
+//! let metadata = parquet_reader.metadata();
+//!
+//! for i in 0..metadata.num_row_groups() {
+//!   let row_group_reader = parquet_reader.get_row_group(i).unwrap();
+//!   let row_group_metadata = metadata.row_group(i);
+//!
+//!   for j in 0..row_group_metadata.num_columns() {
+//!     let column = row_group_metadata.column(j);
+//!
+//!     // Extract column reader and map to typed column reader for required columns.
+//!     let column_reader = row_group_reader
+//!       .get_column_reader(j)
+//!       .expect("Valid column reader");
+//!
+//!     // Extract typed column reader for any INT32 column in the file.
+//!     // It is also possible to extract certain columns based on column descriptors
+//!     // from metadata.
+//!
+//!     match column.column_type() {
+//!       Type::INT32 => {
+//!         let mut typed_column_reader =
+//!           get_typed_column_reader::<Int32Type>(column_reader);
+//!
+//!         // See `read_batch` method for comments on different parameters.
+//!         let mut values = vec![0; 16];
+//!         let mut def_levels = vec![0; 16];
+//!         let mut rep_levels = vec![0; 16];
+//!
+//!         let num_values = typed_column_reader.read_batch(
+//!           8, // batch size
+//!           Some(&mut def_levels), // definition levels
+//!           Some(&mut rep_levels), // repetition levels
+//!           &mut values // read values
+//!         );
+//!
+//!         println!(
+//!           "Read {:?} values, values: {:?}, def_levels: {:?}, rep_levels: {:?}",
+//!           num_values,
+//!           values,
+//!           def_levels,
+//!           rep_levels,
+//!         );
+//!       },
+//!       _ => {
+//!         // Skip any other columns for now, but there could be similar processing.
+//!         println!(
+//!           "Skipped column {} of type {}",
+//!           column.column_path().string(),
+//!           column.column_type()
+//!         );
+//!       }
+//!     }
+//!   }
+//! }
+//! ```
+
 pub mod page;
 pub mod reader;
diff --git a/src/column/page.rs b/src/column/page.rs
index ac121dd..cc24b39 100644
--- a/src/column/page.rs
+++ b/src/column/page.rs
@@ -15,12 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains Parquet Page definitions and page reader interface.
+
 use basic::{PageType, Encoding};
 use errors::Result;
 use util::memory::ByteBufferPtr;
 
 /// Parquet Page definition.
 ///
+/// List of supported pages.
 /// These are 1-to-1 mapped from the equivalent Thrift definitions, except `buf` which
 /// used to store uncompressed bytes of the page.
 pub enum Page {
@@ -50,44 +53,48 @@ pub enum Page {
 }
 
 impl Page {
+  /// Returns [`PageType`] for this page.
   pub fn page_type(&self) -> PageType {
     match self {
-      &Page::DataPage{ .. } => PageType::DATA_PAGE,
-      &Page::DataPageV2{ .. } => PageType::DATA_PAGE_V2,
-      &Page::DictionaryPage{ .. } => PageType::DICTIONARY_PAGE
+      &Page::DataPage { .. } => PageType::DATA_PAGE,
+      &Page::DataPageV2 { .. } => PageType::DATA_PAGE_V2,
+      &Page::DictionaryPage { .. } => PageType::DICTIONARY_PAGE
     }
   }
 
+  /// Returns internal byte buffer reference for this page.
   pub fn buffer(&self) -> &ByteBufferPtr {
     match self {
-      &Page::DataPage{ ref buf, .. } => &buf,
-      &Page::DataPageV2{ ref buf, .. } => &buf,
-      &Page::DictionaryPage{ ref buf, .. } => &buf
+      &Page::DataPage { ref buf, .. } => &buf,
+      &Page::DataPageV2 { ref buf, .. } => &buf,
+      &Page::DictionaryPage { ref buf, .. } => &buf
     }
   }
 
+  /// Returns number of values in this page.
   pub fn num_values(&self) -> u32 {
     match self {
-      &Page::DataPage{ num_values, .. } => num_values,
-      &Page::DataPageV2{ num_values, .. } => num_values,
-      &Page::DictionaryPage{ num_values, .. } => num_values
+      &Page::DataPage { num_values, .. } => num_values,
+      &Page::DataPageV2 { num_values, .. } => num_values,
+      &Page::DictionaryPage { num_values, .. } => num_values
     }
   }
 
+  /// Returns this page [`Encoding`].
   pub fn encoding(&self) -> Encoding {
     match self {
-      &Page::DataPage{ encoding, .. } => encoding,
-      &Page::DataPageV2{ encoding, .. } => encoding,
-      &Page::DictionaryPage{ encoding, .. } => encoding
+      &Page::DataPage { encoding, .. } => encoding,
+      &Page::DataPageV2 { encoding, .. } => encoding,
+      &Page::DictionaryPage { encoding, .. } => encoding
     }
   }
 }
 
-/// API for reading pages from a column chunk. This offers a iterator like API to get the
-/// next page.
+/// API for reading pages from a column chunk.
+/// This offers a iterator like API to get the next page.
 pub trait PageReader {
   /// Gets the next page in the column chunk associated with this reader.
-  /// Returns `None` if there's no page left.
+  /// Returns `None` if there are no pages left.
   fn get_next_page(&mut self) -> Result<Option<Page>>;
 }
 
diff --git a/src/column/reader.rs b/src/column/reader.rs
index d3b5e0d..0961681 100644
--- a/src/column/reader.rs
+++ b/src/column/reader.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains column reader API.
+
 use std::cmp::{max, min};
 use std::collections::HashMap;
 use std::mem;
@@ -28,6 +30,7 @@ use errors::{Result, ParquetError};
 use schema::types::ColumnDescPtr;
 use util::memory::ByteBufferPtr;
 
+/// Column reader for a Parquet type.
 pub enum ColumnReader {
   BoolColumnReader(ColumnReaderImpl<BoolType>),
   Int32ColumnReader(ColumnReaderImpl<Int32Type>),
@@ -67,6 +70,7 @@ pub fn get_column_reader(
 
 /// Gets a typed column reader for the specific type `T`, by "up-casting" `col_reader` of
 /// non-generic type to a generic column reader type `ColumnReaderImpl`.
+///
 /// NOTE: the caller MUST guarantee that the actual enum value for `col_reader` matches
 /// the type `T`. Otherwise, disastrous consequence could happen.
 pub fn get_typed_column_reader<T: DataType>(
@@ -84,7 +88,7 @@ pub fn get_typed_column_reader<T: DataType>(
   }
 }
 
-/// A value reader for a particular primitive column.
+/// Typed value reader for a particular primitive column.
 pub struct ColumnReaderImpl<T: DataType> {
   descr: ColumnDescPtr,
   def_level_decoder: Option<LevelDecoder>,
@@ -104,6 +108,7 @@ pub struct ColumnReaderImpl<T: DataType> {
 }
 
 impl<T: DataType> ColumnReaderImpl<T> where T: 'static {
+  /// Creates new column reader based on column descriptor and page reader.
   pub fn new(descr: ColumnDescPtr, page_reader: Box<PageReader>) -> Self {
     Self {
       descr: descr,
@@ -335,7 +340,7 @@ impl<T: DataType> ColumnReaderImpl<T> where T: 'static {
     Ok(true)
   }
 
-  // Resolves and updates encoding and set decoder for the current page
+  /// Resolves and updates encoding and set decoder for the current page
   fn set_current_page_encoding(
     &mut self,
     mut encoding: Encoding,
diff --git a/src/lib.rs b/src/lib.rs
index 4bc0a48..b971fb4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -84,7 +84,7 @@
 //!
 //! # Read API
 //!
-//! Crate has several methods to read data from a Parquet file:
+//! Crate offers several methods to read data from a Parquet file:
 //! - Low level column reader API (see [`column`] module)
 //! - Arrow API (_TODO_)
 //! - High level record API (see [`record`] module)

From a7f3bb7e6455fb11f88cb54a538973aabcfc6dcb Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 10:59:51 +1200
Subject: [PATCH 13/32] add file doc and some minor updates

---
 src/column/mod.rs    |  3 +-
 src/file/metadata.rs | 79 ++++++++++++++++++++++++++++++++++----------
 src/file/mod.rs      | 21 ++++++++++++
 src/file/reader.rs   | 55 ++++++++++++++++++------------
 src/lib.rs           | 25 ++++++++++++--
 5 files changed, 140 insertions(+), 43 deletions(-)

diff --git a/src/column/mod.rs b/src/column/mod.rs
index 2683fc2..77cceba 100644
--- a/src/column/mod.rs
+++ b/src/column/mod.rs
@@ -18,12 +18,13 @@
 //! Low level column reader API.
 //!
 //! This API is designed for the direct mapping with subsequent manual handling of
-//! definition and repetition values and spacing. This allows to create column vectors
+//! definition and repetition levels and spacing. This allows to create column vectors
 //! and batches and map them directly to Parquet data.
 //!
 //! See below an example of using the API.
 //!
 //! # Example
+//!
 //! ```rust
 //! use std::fs::File;
 //! use std::path::Path;
diff --git a/src/file/metadata.rs b/src/file/metadata.rs
index d1fac90..8c36bd3 100644
--- a/src/file/metadata.rs
+++ b/src/file/metadata.rs
@@ -15,6 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains information about available Parquet metadata.
+//! The hierarchy of metadata is as follows:
+//!
+//! [`ParquetMetaData`] contains [`FileMetaData`] and zero or more [`RowGroupMetaData`]
+//! for each row group.
+//!
+//! Each [`RowGroupMetaData`] contains one or more [`ColumnChunkMetaData`] for each
+//! column chunk.
+//!
+//! Each [`ColumnChunkMetaData`] contains information about the leaf (primitive) column,
+//! including descriptor, page metadata, byte sizes, compression, encodings, etc.
+
 use std::rc::Rc;
 
 use basic::{Compression, Encoding, Type};
@@ -23,14 +35,17 @@ use schema::types::{ColumnDescriptor, ColumnDescPtr, ColumnPath};
 use schema::types::{SchemaDescriptor, SchemaDescPtr, Type as SchemaType, TypePtr};
 use parquet_thrift::parquet::{ColumnChunk, ColumnMetaData, RowGroup};
 
+/// Reference counted pointer for [`ParquetMetaData`].
 pub type ParquetMetaDataPtr = Rc<ParquetMetaData>;
 
+/// Global Parquet metadata.
 pub struct ParquetMetaData {
   file_metadata: FileMetaDataPtr,
   row_groups: Vec<RowGroupMetaDataPtr>
 }
 
 impl ParquetMetaData {
+  /// Creates new Parquet metadata from file metadata and list of row group metadata.
   pub fn new(file_metadata: FileMetaData, row_groups: Vec<RowGroupMetaData>) -> Self {
     ParquetMetaData {
       file_metadata: Rc::new(file_metadata),
@@ -38,26 +53,32 @@ impl ParquetMetaData {
     }
   }
 
+  /// Returns [`FileMetaData`] reference.
   pub fn file_metadata(&self) -> FileMetaDataPtr {
     self.file_metadata.clone()
   }
 
+  /// Returns number of row groups in this file.
   pub fn num_row_groups(&self) -> usize {
     self.row_groups.len()
   }
 
+  /// Returns row group for a particular position.
+  /// Position should be less than number of row groups `num_row_groups`.
   pub fn row_group(&self, i: usize) -> RowGroupMetaDataPtr {
     self.row_groups[i].clone()
   }
 
+  /// Returns slice of row group reference counted pointers in this file.
   pub fn row_groups(&self) -> &[RowGroupMetaDataPtr] {
     &self.row_groups.as_slice()
   }
 }
 
+/// Reference counted pointer for [`FileMetaData`].
 pub type FileMetaDataPtr = Rc<FileMetaData>;
 
-/// Metadata for a Parquet file
+/// Metadata for a Parquet file.
 pub struct FileMetaData {
   version: i32,
   num_rows: i64,
@@ -67,6 +88,7 @@ pub struct FileMetaData {
 }
 
 impl FileMetaData {
+  /// Creates new file metadata.
   pub fn new(
     version: i32,
     num_rows: i64,
@@ -83,34 +105,47 @@ impl FileMetaData {
     }
   }
 
+  /// Returns version of this file.
   pub fn version(&self) -> i32 {
     self.version
   }
 
+  /// Returns number of rows in the file.
   pub fn num_rows(&self) -> i64 {
     self.num_rows
   }
 
+  /// String message for application that wrote this file. This will have format
+  /// `<application> version <application version> (build <build hash>)`.
+  /// For example,
+  /// ```shell
+  /// parquet-mr version 1.8.1 (build 4aba4dae7bb0d4edbcf7923ae1339f28fd3f7fcf)
+  /// impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)
+  /// ```
   pub fn created_by(&self) -> &Option<String> {
     &self.created_by
   }
 
+  /// Returns reference to the Parquet schema.
   pub fn schema(&self) -> &SchemaType {
     self.schema.as_ref()
   }
 
+  /// Returns reference to schema descriptor for the file.
   pub fn schema_descr(&self) -> &SchemaDescriptor {
     &self.schema_descr
   }
 
+  /// Returns [`Rc`] for schema descriptor.
   pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
     self.schema_descr.clone()
   }
 }
 
+/// Reference counted pointer for [`RowGroupMetaData`].
 pub type RowGroupMetaDataPtr = Rc<RowGroupMetaData>;
 
-/// Metadata for a row group
+/// Metadata for a row group.
 pub struct RowGroupMetaData {
   columns: Vec<ColumnChunkMetaDataPtr>,
   num_rows: i64,
@@ -119,34 +154,42 @@ pub struct RowGroupMetaData {
 }
 
 impl RowGroupMetaData {
+  /// Number of columns in this row group.
   pub fn num_columns(&self) -> usize {
     self.columns.len()
   }
 
+  /// Returns reference to a [`ColumnChunkMetaData`] for a column index.
   pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
     &self.columns[i]
   }
 
+  /// Returns slice of column chunk metadata [`Rc`] pointers.
   pub fn columns(&self) -> &[ColumnChunkMetaDataPtr] {
     &self.columns
   }
 
+  /// Returns number of rows in this row group.
   pub fn num_rows(&self) -> i64 {
     self.num_rows
   }
 
+  /// Returns total byte size of all the uncompressed column data in this row group.
   pub fn total_byte_size(&self) -> i64 {
     self.total_byte_size
   }
 
+  /// Returns reference to a schema descriptor.
   pub fn schema_descr(&self) -> &SchemaDescriptor {
     self.schema_descr.as_ref()
   }
 
+  /// Returns reference counted clone of schema descriptor.
   pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
     self.schema_descr.clone()
   }
 
+  /// Internal conversion from Thrift method.
   pub fn from_thrift(
     schema_descr: SchemaDescPtr,
     mut rg: RowGroup
@@ -168,9 +211,10 @@ impl RowGroupMetaData {
   }
 }
 
+/// Reference counted pointer for [`ColumnChunkMetaData`].
 pub type ColumnChunkMetaDataPtr = Rc<ColumnChunkMetaData>;
 
-/// Metadata for a column chunk
+/// Metadata for a column chunk.
 pub struct ColumnChunkMetaData {
   column_type: Type,
   column_path: ColumnPath,
@@ -187,7 +231,7 @@ pub struct ColumnChunkMetaData {
   dictionary_page_offset: Option<i64>
 }
 
-/// Represents common operations for a column chunk
+/// Represents common operations for a column chunk.
 impl ColumnChunkMetaData {
   /// File where the column chunk is stored. If not set, assumed to
   /// be at the same file as the metadata.
@@ -206,73 +250,74 @@ impl ColumnChunkMetaData {
     self.column_type
   }
 
-  /// Path (or identifier) of this column
+  /// Path (or identifier) of this column.
   pub fn column_path(&self) -> &ColumnPath {
     &self.column_path
   }
 
-  /// Descriptor for this column
+  /// Descriptor for this column.
   pub fn column_descr(&self) -> &ColumnDescriptor {
     self.column_descr.as_ref()
   }
 
-  /// Reference counted clone of descriptor for this column
+  /// Reference counted clone of descriptor for this column.
   pub fn column_descr_ptr(&self) -> ColumnDescPtr {
     self.column_descr.clone()
   }
 
-  /// All encodings used for this column
+  /// All encodings used for this column.
   pub fn encodings(&self) -> &Vec<Encoding> {
     &self.encodings
   }
 
-  /// Total number of values in this column chunk
+  /// Total number of values in this column chunk.
   pub fn num_values(&self) -> i64 {
     self.num_values
   }
 
+  /// Compression for this column.
   pub fn compression(&self) -> Compression {
     self.compression
   }
 
-  /// Get the total compressed data size of this column chunk
+  /// Returns the total compressed data size of this column chunk.
   pub fn compressed_size(&self) -> i64 {
     self.total_compressed_size
   }
 
-  /// Get the total uncompressed data size of this column chunk
+  /// Returns the total uncompressed data size of this column chunk.
   pub fn uncompressed_size(&self) -> i64 {
     self.total_uncompressed_size
   }
 
-  /// Get the offset for the column data
+  /// Returns the offset for the column data.
   pub fn data_page_offset(&self) -> i64 {
     self.data_page_offset
   }
 
-  /// Whether this column chunk contains a index page
+  /// Returns `true` if this column chunk contains a index page, `false` otherwise.
   pub fn has_index_page(&self) -> bool {
     self.index_page_offset.is_some()
   }
 
-  /// Get the offset for the index page
+  /// Returns the offset for the index page.
   pub fn index_page_offset(&self) -> Option<i64> {
     self.index_page_offset
   }
 
-  /// Whether this column chunk contains a dictionary page
+  /// Returns `true` if this column chunk contains a dictionary page, `false` otherwise.
   pub fn has_dictionary_page(&self) -> bool {
     self.dictionary_page_offset.is_some()
   }
 
   /// TODO: add statistics
 
-  /// Get the offset for the dictionary page, if any
+  /// Returns the offset for the dictionary page, if any.
   pub fn dictionary_page_offset(&self) -> Option<i64> {
     self.dictionary_page_offset
   }
 
-  /// Conversion from Thrift
+  /// Internal conversion from Thrift method.
   pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
     if cc.meta_data.is_none() {
       return Err(general_err!("Expected to have column metadata"));
diff --git a/src/file/mod.rs b/src/file/mod.rs
index 0b63bd8..fcc4784 100644
--- a/src/file/mod.rs
+++ b/src/file/mod.rs
@@ -15,5 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Main entrypoint of working with Parquet files.
+//! Contains various file metadata, file and row group readers.
+//!
+//! # Example
+//!
+//! ```rust
+//! use std::fs::File;
+//! use std::path::Path;
+//! use parquet::file::reader::{FileReader, SerializedFileReader};
+//!
+//! // Opening a file
+//! let path = Path::new("data/alltypes_plain.parquet");
+//! let file = File::open(&path).expect("File should exist");
+//!
+//! let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
+//!
+//! let parquet_metadata = reader.metadata();
+//! let row_group = reader.get_row_group(0);
+//! ```
+//!
+
 pub mod metadata;
 pub mod reader;
diff --git a/src/file/reader.rs b/src/file/reader.rs
index 33ce0dc..a5ce9c8 100644
--- a/src/file/reader.rs
+++ b/src/file/reader.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains file reader API, and provides methods to access file metadata, row group
+//! readers to read individual column chunks, or access record iterator.
+
 use std::fs::File;
 use std::io::{self, BufReader, Read, Seek, SeekFrom};
 use std::rc::Rc;
@@ -38,22 +41,21 @@ use util::memory::ByteBufferPtr;
 // APIs for file & row group readers
 
 /// Parquet file reader API. With this, user can get metadata information about the
-/// Parquet file, and can get reader for each row group.
+/// Parquet file, can get reader for each row group, and access record iterator.
 pub trait FileReader {
-  /// Get metadata information about this file
+  /// Get metadata information about this file.
   fn metadata(&self) -> ParquetMetaDataPtr;
 
-  /// Get the total number of row groups for this file
+  /// Get the total number of row groups for this file.
   fn num_row_groups(&self) -> usize;
 
   /// Get the `i`th row group reader. Note this doesn't do bound check.
-  /// N.B.: Box<..> has 'static lifetime in default, but here we need the lifetime to be
-  /// the same as this. Otherwise, the row group metadata stored in the row group reader
-  /// may outlive the file reader.
   fn get_row_group(&self, i: usize) -> Result<Box<RowGroupReader>>;
 
   /// Get full iterator of `Row` from a file (over all row groups).
+  ///
   /// Iterator will automatically load the next row group to advance.
+  ///
   /// Projected schema can be a subset of or equal to the file schema, when it is None,
   /// full file schema is assumed.
   fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
@@ -65,13 +67,13 @@ pub trait RowGroupReader {
   /// Get metadata information about this row group.
   fn metadata(&self) -> RowGroupMetaDataPtr;
 
-  /// Get the total number of column chunks in this row group
+  /// Get the total number of column chunks in this row group.
   fn num_columns(&self) -> usize;
 
-  /// Get page reader for the `i`th column chunk
+  /// Get page reader for the `i`th column chunk.
   fn get_column_page_reader(&self, i: usize) -> Result<Box<PageReader>>;
 
-  /// Get value reader for the `i`th column chunk
+  /// Get value reader for the `i`th column chunk.
   fn get_column_reader(&self, i: usize) -> Result<ColumnReader>;
 
   /// Get iterator of `Row` from this row group.
@@ -80,14 +82,15 @@ pub trait RowGroupReader {
   fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
 }
 
-
 /// A thin wrapper on `T: Read` to be used by Thrift transport. Write is not supported.
-pub struct TMemoryBuffer<'a, T> where T: 'a + Read {
+struct TMemoryBuffer<'a, T> where T: 'a + Read {
   data: &'a mut T
 }
 
 impl<'a, T: 'a + Read> TMemoryBuffer<'a, T> {
-  pub fn new(data: &'a mut T) -> Self { Self { data: data } }
+  fn new(data: &'a mut T) -> Self {
+    Self { data: data }
+  }
 }
 
 impl<'a, T: 'a + Read> Read for TMemoryBuffer<'a, T> {
@@ -103,12 +106,16 @@ impl<'a, T: 'a + Read> Read for TMemoryBuffer<'a, T> {
 const FOOTER_SIZE: usize = 8;
 const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
 
+/// A serialized implementation for a file reader.
+/// Main entrypoint for using Parquet API.
 pub struct SerializedFileReader {
   buf: BufReader<File>,
   metadata: ParquetMetaDataPtr
 }
 
 impl SerializedFileReader {
+  /// Creates file reader from a Parquet file.
+  /// Returns error if Parquet file does not exist or is corrupt.
   pub fn new(file: File) -> Result<Self> {
     let mut buf = BufReader::new(file);
     let metadata = Self::parse_metadata(&mut buf)?;
@@ -125,25 +132,25 @@ impl SerializedFileReader {
     let file_metadata = buf.get_ref().metadata()?;
     let file_size = file_metadata.len();
     if file_size < (FOOTER_SIZE as u64) {
-      return Err(general_err!("Invalid parquet file. Size is smaller than footer"));
+      return Err(general_err!("Invalid Parquet file. Size is smaller than footer"));
     }
     let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
     buf.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
     buf.read_exact(&mut footer_buffer)?;
     if footer_buffer[4..] != PARQUET_MAGIC {
-      return Err(general_err!("Invalid parquet file. Corrupt footer"));
+      return Err(general_err!("Invalid Parquet file. Corrupt footer"));
     }
     let metadata_len = LittleEndian::read_i32(&footer_buffer[0..4]) as i64;
     if metadata_len < 0 {
       return Err(general_err!(
-        "Invalid parquet file. Metadata length is less than zero ({})",
+        "Invalid Parquet file. Metadata length is less than zero ({})",
         metadata_len
       ));
     }
     let metadata_start: i64 = file_size as i64 - FOOTER_SIZE as i64 - metadata_len;
     if metadata_start < 0 {
       return Err(general_err!(
-        "Invalid parquet file. Metadata start is less than zero ({})",
+        "Invalid Parquet file. Metadata start is less than zero ({})",
         metadata_start
       ));
     }
@@ -185,6 +192,7 @@ impl FileReader for SerializedFileReader {
 
   fn get_row_group(&self, i: usize) -> Result<Box<RowGroupReader>> {
     let row_group_metadata = self.metadata.row_group(i);
+    // Row groups should be processed sequentially.
     let f = self.buf.get_ref().try_clone()?;
     Ok(Box::new(SerializedRowGroupReader::new(f, row_group_metadata)))
   }
@@ -194,13 +202,14 @@ impl FileReader for SerializedFileReader {
   }
 }
 
-/// A serialized impl for row group reader
+/// A serialized implementation for row group reader.
 pub struct SerializedRowGroupReader {
   buf: BufReader<File>,
   metadata: RowGroupMetaDataPtr
 }
 
 impl SerializedRowGroupReader {
+  /// Creates new serialized row group reader.
   pub fn new(file: File, metadata: RowGroupMetaDataPtr) -> Self {
     let buf = BufReader::new(file);
     Self { buf, metadata }
@@ -262,7 +271,7 @@ impl RowGroupReader for SerializedRowGroupReader {
 }
 
 
-/// A serialized impl for Parquet page reader
+/// A serialized implementation for Parquet page reader.
 pub struct SerializedPageReader {
   // The file chunk buffer which references exactly the bytes for the column trunk to be
   // read by this page reader
@@ -279,6 +288,7 @@ pub struct SerializedPageReader {
 }
 
 impl SerializedPageReader {
+  /// Creates new serialized page reader from file chunk.
   pub fn new(
     buf: FileChunk,
     total_num_values: i64,
@@ -294,6 +304,7 @@ impl SerializedPageReader {
     Ok(result)
   }
 
+  /// Reads Page header from Thrift.
   fn read_page_header(&mut self) -> Result<PageHeader> {
     let transport = TMemoryBuffer::new(&mut self.buf);
     let mut prot = TCompactInputProtocol::new(transport);
@@ -421,7 +432,7 @@ mod tests {
     assert!(reader_result.is_err());
     assert_eq!(
       reader_result.err().unwrap(),
-      general_err!("Invalid parquet file. Size is smaller than footer")
+      general_err!("Invalid Parquet file. Size is smaller than footer")
     );
   }
 
@@ -432,7 +443,7 @@ mod tests {
     assert!(reader_result.is_err());
     assert_eq!(
       reader_result.err().unwrap(),
-      general_err!("Invalid parquet file. Corrupt footer")
+      general_err!("Invalid Parquet file. Corrupt footer")
     );
   }
 
@@ -444,7 +455,7 @@ mod tests {
     assert!(reader_result.is_err());
     assert_eq!(
       reader_result.err().unwrap(),
-      general_err!("Invalid parquet file. Metadata length is less than zero (-16777216)")
+      general_err!("Invalid Parquet file. Metadata length is less than zero (-16777216)")
     );
   }
 
@@ -456,7 +467,7 @@ mod tests {
     assert!(reader_result.is_err());
     assert_eq!(
       reader_result.err().unwrap(),
-      general_err!("Invalid parquet file. Metadata start is less than zero (-255)")
+      general_err!("Invalid Parquet file. Metadata start is less than zero (-255)")
     );
   }
 
diff --git a/src/lib.rs b/src/lib.rs
index b971fb4..7dfa94e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -23,18 +23,38 @@
 //! Crate provides API to access file schema and metadata from a Parquet file, extract
 //! row groups or column chunks from a file, and read records/values.
 //!
+//! # Usage
+//!
+//! This crate is [on crates.io](https://crates.io/crates/parquet) and can be used by
+//! adding `parquet` to the list of dependencies in `Cargo.toml`.
+//!
+//! ```toml
+//! [dependencies]
+//! parquet = "0.1"
+//! ```
+//!
+//! and this to the project's crate root:
+//!
+//! ```rust
+//! extern crate parquet;
+//! ```
+//!
 //! # Example
+//!
+//! Below is the example of reading a Parquet file, listing Parquet metadata including
+//! column chunk metadata, using record API and accessing row group readers.
+//!
 //! ```rust
 //! use std::fs::File;
 //! use std::path::Path;
 //! use parquet::file::reader::{FileReader, SerializedFileReader};
 //!
-//! // Opening a file
+//! // Creating a file reader
 //! let path = Path::new("data/alltypes_plain.parquet");
 //! let file = File::open(&path).expect("File should exist");
 //! let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
 //!
-//! // Accessing Parquet metadata
+//! // Listing Parquet metadata
 //! let parquet_metadata = reader.metadata();
 //! let file_metadata = parquet_metadata.file_metadata();
 //! for i in 0..parquet_metadata.num_row_groups() {
@@ -57,7 +77,6 @@
 //! for i in 0..reader.num_row_groups() {
 //!   let row_group_reader = reader.get_row_group(i).expect("Should be okay");
 //! }
-//!
 //! ```
 //!
 //! # Metadata

From c2e1bdd5c04f4501d5e5b8a81538613fa807ee42 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 11:49:20 +1200
Subject: [PATCH 14/32] update readme

---
 README.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/README.md b/README.md
index e134585..bfe27a3 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,33 @@
 
 An [Apache Parquet](https://parquet.apache.org/) implementation in Rust (work in progress)
 
+## Usage
+Add this to your Cargo.toml:
+```toml
+[dependencies]
+parquet = "0.1"
+```
+
+and this to your crate root:
+```rust
+extern crate parquet;
+```
+
+Example usage:
+```rust
+use std::fs::File;
+use std::path::Path;
+use parquet::file::reader::{FileReader, SerializedFileReader};
+
+let file = File::open(&Path::new("/path/to/file")).expect("File should exist");
+let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
+let mut iter = reader.get_row_iter(None).unwrap();
+while let Some(record) = iter.next() {
+  println!("{}", record);
+}
+```
+See crate documentation on available API.
+
 ## Requirements
 - Rust nightly
 - Thrift 0.11.0 or higher

From 99a135390b19c394c87234c0f25eb68c39c9b930 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 11:56:45 +1200
Subject: [PATCH 15/32] add docs.rs link

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index bfe27a3..a6c68ac 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 [![Coverage Status](https://coveralls.io/repos/github/sunchao/parquet-rs/badge.svg?branch=master)](https://coveralls.io/github/sunchao/parquet-rs?branch=master)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![](http://meritbadge.herokuapp.com/parquet)](https://crates.io/crates/parquet)
+[![Documentation](https://docs.rs/parquet/badge.svg)](https://docs.rs/parquet)
 
 An [Apache Parquet](https://parquet.apache.org/) implementation in Rust (work in progress)
 
@@ -25,8 +26,8 @@ use std::fs::File;
 use std::path::Path;
 use parquet::file::reader::{FileReader, SerializedFileReader};
 
-let file = File::open(&Path::new("/path/to/file")).expect("File should exist");
-let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
+let file = File::open(&Path::new("/path/to/file")).unwrap();
+let reader = SerializedFileReader::new(file).unwrap();
 let mut iter = reader.get_row_iter(None).unwrap();
 while let Some(record) = iter.next() {
   println!("{}", record);

From 5d34f72aeb568f18a7ec11cad1b57d91d4bce769 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 20:13:23 +1200
Subject: [PATCH 16/32] update docs

---
 src/file/metadata.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/file/metadata.rs b/src/file/metadata.rs
index 8c36bd3..9c764eb 100644
--- a/src/file/metadata.rs
+++ b/src/file/metadata.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 //! Contains information about available Parquet metadata.
+//!
 //! The hierarchy of metadata is as follows:
 //!
 //! [`ParquetMetaData`] contains [`FileMetaData`] and zero or more [`RowGroupMetaData`]
@@ -63,7 +64,7 @@ impl ParquetMetaData {
     self.row_groups.len()
   }
 
-  /// Returns row group for a particular position.
+  /// Returns row group metadata at `i`th position.
   /// Position should be less than number of row groups `num_row_groups`.
   pub fn row_group(&self, i: usize) -> RowGroupMetaDataPtr {
     self.row_groups[i].clone()
@@ -136,7 +137,7 @@ impl FileMetaData {
     &self.schema_descr
   }
 
-  /// Returns [`Rc`] for schema descriptor.
+  /// Returns reference counted clone for schema descriptor.
   pub fn schema_descr_ptr(&self) -> SchemaDescPtr {
     self.schema_descr.clone()
   }
@@ -159,7 +160,7 @@ impl RowGroupMetaData {
     self.columns.len()
   }
 
-  /// Returns reference to a [`ColumnChunkMetaData`] for a column index.
+  /// Returns reference to a [`ColumnChunkMetaData`] at `i`th index.
   pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
     &self.columns[i]
   }
@@ -174,7 +175,7 @@ impl RowGroupMetaData {
     self.num_rows
   }
 
-  /// Returns total byte size of all the uncompressed column data in this row group.
+  /// Returns total byte size of all uncompressed column data in this row group.
   pub fn total_byte_size(&self) -> i64 {
     self.total_byte_size
   }
@@ -189,7 +190,7 @@ impl RowGroupMetaData {
     self.schema_descr.clone()
   }
 
-  /// Internal conversion from Thrift method.
+  /// Internal conversion from Thrift.
   pub fn from_thrift(
     schema_descr: SchemaDescPtr,
     mut rg: RowGroup
@@ -317,7 +318,7 @@ impl ColumnChunkMetaData {
     self.dictionary_page_offset
   }
 
-  /// Internal conversion from Thrift method.
+  /// Internal conversion from Thrift.
   pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
     if cc.meta_data.is_none() {
       return Err(general_err!("Expected to have column metadata"));

From db3ae7a91d42ae2184f55b91636a25f07024ad8d Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 20:23:25 +1200
Subject: [PATCH 17/32] update file docs

---
 src/file/metadata.rs | 47 ++++++++++++++++++++++++--------------------
 src/file/mod.rs      | 15 +++++++-------
 src/file/reader.rs   | 29 +++++++++++++--------------
 3 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/src/file/metadata.rs b/src/file/metadata.rs
index 9c764eb..245daf3 100644
--- a/src/file/metadata.rs
+++ b/src/file/metadata.rs
@@ -22,11 +22,13 @@
 //! [`ParquetMetaData`] contains [`FileMetaData`] and zero or more [`RowGroupMetaData`]
 //! for each row group.
 //!
-//! Each [`RowGroupMetaData`] contains one or more [`ColumnChunkMetaData`] for each
-//! column chunk.
+//! [`FileMetaData`] includes file version, application specific metadata.
 //!
-//! Each [`ColumnChunkMetaData`] contains information about the leaf (primitive) column,
-//! including descriptor, page metadata, byte sizes, compression, encodings, etc.
+//! Each [`RowGroupMetaData`] contains information about row group and one or more
+//! [`ColumnChunkMetaData`] for each column chunk.
+//!
+//! [`ColumnChunkMetaData`] has information about column chunk (primitive leaf column),
+//! including encoding/compression, number of values, etc.
 
 use std::rc::Rc;
 
@@ -46,7 +48,8 @@ pub struct ParquetMetaData {
 }
 
 impl ParquetMetaData {
-  /// Creates new Parquet metadata from file metadata and list of row group metadata.
+  /// Creates Parquet metadata from file metadata and a list of row group metadata for
+  /// each available row group.
   pub fn new(file_metadata: FileMetaData, row_groups: Vec<RowGroupMetaData>) -> Self {
     ParquetMetaData {
       file_metadata: Rc::new(file_metadata),
@@ -54,7 +57,7 @@ impl ParquetMetaData {
     }
   }
 
-  /// Returns [`FileMetaData`] reference.
+  /// Returns file metadata as reference counted clone.
   pub fn file_metadata(&self) -> FileMetaDataPtr {
     self.file_metadata.clone()
   }
@@ -64,7 +67,7 @@ impl ParquetMetaData {
     self.row_groups.len()
   }
 
-  /// Returns row group metadata at `i`th position.
+  /// Returns row group metadata for `i`th position.
   /// Position should be less than number of row groups `num_row_groups`.
   pub fn row_group(&self, i: usize) -> RowGroupMetaDataPtr {
     self.row_groups[i].clone()
@@ -116,23 +119,24 @@ impl FileMetaData {
     self.num_rows
   }
 
-  /// String message for application that wrote this file. This will have format
-  /// `<application> version <application version> (build <build hash>)`.
-  /// For example,
+  /// String message for application that wrote this file.
+  ///
+  /// This should have the following format:
+  /// `<application> version <application version> (build <application build hash>)`.
+  ///
   /// ```shell
-  /// parquet-mr version 1.8.1 (build 4aba4dae7bb0d4edbcf7923ae1339f28fd3f7fcf)
-  /// impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)
+  /// parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)
   /// ```
   pub fn created_by(&self) -> &Option<String> {
     &self.created_by
   }
 
-  /// Returns reference to the Parquet schema.
+  /// Returns Parquet ['Type`] that describes schema in this file.
   pub fn schema(&self) -> &SchemaType {
     self.schema.as_ref()
   }
 
-  /// Returns reference to schema descriptor for the file.
+  /// Returns a reference to schema descriptor.
   pub fn schema_descr(&self) -> &SchemaDescriptor {
     &self.schema_descr
   }
@@ -160,7 +164,7 @@ impl RowGroupMetaData {
     self.columns.len()
   }
 
-  /// Returns reference to a [`ColumnChunkMetaData`] at `i`th index.
+  /// Returns column chunk metadata for `i`th column.
   pub fn column(&self, i: usize) -> &ColumnChunkMetaData {
     &self.columns[i]
   }
@@ -170,12 +174,12 @@ impl RowGroupMetaData {
     &self.columns
   }
 
-  /// Returns number of rows in this row group.
+  /// Number of rows in this row group.
   pub fn num_rows(&self) -> i64 {
     self.num_rows
   }
 
-  /// Returns total byte size of all uncompressed column data in this row group.
+  /// Total byte size of all uncompressed column data in this row group.
   pub fn total_byte_size(&self) -> i64 {
     self.total_byte_size
   }
@@ -190,7 +194,7 @@ impl RowGroupMetaData {
     self.schema_descr.clone()
   }
 
-  /// Internal conversion from Thrift.
+  /// Internal method to convert from Thrift.
   pub fn from_thrift(
     schema_descr: SchemaDescPtr,
     mut rg: RowGroup
@@ -234,8 +238,9 @@ pub struct ColumnChunkMetaData {
 
 /// Represents common operations for a column chunk.
 impl ColumnChunkMetaData {
-  /// File where the column chunk is stored. If not set, assumed to
-  /// be at the same file as the metadata.
+  /// File where the column chunk is stored.
+  ///
+  /// If not set, assumed to belong to the same file as the metadata.
   /// This path is relative to the current file.
   pub fn file_path(&self) -> Option<&String> {
     self.file_path.as_ref()
@@ -318,7 +323,7 @@ impl ColumnChunkMetaData {
     self.dictionary_page_offset
   }
 
-  /// Internal conversion from Thrift.
+  /// Internal method to convert from Thrift.
   pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
     if cc.meta_data.is_none() {
       return Err(general_err!("Expected to have column metadata"));
diff --git a/src/file/mod.rs b/src/file/mod.rs
index fcc4784..8e66e9c 100644
--- a/src/file/mod.rs
+++ b/src/file/mod.rs
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Main entrypoint of working with Parquet files.
-//! Contains various file metadata, file and row group readers.
+//! Main entrypoint for working with Parquet API.
+//! Provides access to file and row group readers, record API, etc.
+//!
+//! See [`reader::SerializedFileReader`] for a starting reference and
+//! [`metadata::ParquetMetaData`] for file metadata.
 //!
 //! # Example
 //!
@@ -25,16 +28,12 @@
 //! use std::path::Path;
 //! use parquet::file::reader::{FileReader, SerializedFileReader};
 //!
-//! // Opening a file
-//! let path = Path::new("data/alltypes_plain.parquet");
-//! let file = File::open(&path).expect("File should exist");
-//!
-//! let reader = SerializedFileReader::new(file).expect("Valid Parquet file");
+//! let file = File::open(&Path::new("data/alltypes_plain.parquet")).unwrap();
+//! let reader = SerializedFileReader::new(file).unwrap();
 //!
 //! let parquet_metadata = reader.metadata();
 //! let row_group = reader.get_row_group(0);
 //! ```
-//!
 
 pub mod metadata;
 pub mod reader;
diff --git a/src/file/reader.rs b/src/file/reader.rs
index a5ce9c8..7f3b1d0 100644
--- a/src/file/reader.rs
+++ b/src/file/reader.rs
@@ -52,7 +52,7 @@ pub trait FileReader {
   /// Get the `i`th row group reader. Note this doesn't do bound check.
   fn get_row_group(&self, i: usize) -> Result<Box<RowGroupReader>>;
 
-  /// Get full iterator of `Row` from a file (over all row groups).
+  /// Get full iterator of `Row`s from a file (over all row groups).
   ///
   /// Iterator will automatically load the next row group to advance.
   ///
@@ -76,7 +76,8 @@ pub trait RowGroupReader {
   /// Get value reader for the `i`th column chunk.
   fn get_column_reader(&self, i: usize) -> Result<ColumnReader>;
 
-  /// Get iterator of `Row` from this row group.
+  /// Get iterator of `Row`s from this row group.
+  ///
   /// Projected schema can be a subset of or equal to the file schema, when it is None,
   /// full file schema is assumed.
   fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
@@ -106,8 +107,7 @@ impl<'a, T: 'a + Read> Read for TMemoryBuffer<'a, T> {
 const FOOTER_SIZE: usize = 8;
 const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
 
-/// A serialized implementation for a file reader.
-/// Main entrypoint for using Parquet API.
+/// A serialized implementation for Parquet [`FileReader`].
 pub struct SerializedFileReader {
   buf: BufReader<File>,
   metadata: ParquetMetaDataPtr
@@ -202,15 +202,15 @@ impl FileReader for SerializedFileReader {
   }
 }
 
-/// A serialized implementation for row group reader.
+/// A serialized implementation for Parquet [`RowGroupReader`].
 pub struct SerializedRowGroupReader {
   buf: BufReader<File>,
   metadata: RowGroupMetaDataPtr
 }
 
 impl SerializedRowGroupReader {
-  /// Creates new serialized row group reader.
-  pub fn new(file: File, metadata: RowGroupMetaDataPtr) -> Self {
+  /// Creates new row group reader from a file and row group metadata.
+  fn new(file: File, metadata: RowGroupMetaDataPtr) -> Self {
     let buf = BufReader::new(file);
     Self { buf, metadata }
   }
@@ -270,26 +270,25 @@ impl RowGroupReader for SerializedRowGroupReader {
   }
 }
 
-
-/// A serialized implementation for Parquet page reader.
+/// A serialized implementation for Parquet [`PageReader`].
 pub struct SerializedPageReader {
-  // The file chunk buffer which references exactly the bytes for the column trunk to be
-  // read by this page reader
+  // The file chunk buffer which references exactly the bytes for the column trunk
+  // to be read by this page reader.
   buf: FileChunk,
 
   // The compression codec for this column chunk. Only set for non-PLAIN codec.
   decompressor: Option<Box<Codec>>,
 
-  // The number of values we have seen so far
+  // The number of values we have seen so far.
   seen_num_values: i64,
 
-  // The number of total values in this column chunk
+  // The number of total values in this column chunk.
   total_num_values: i64
 }
 
 impl SerializedPageReader {
-  /// Creates new serialized page reader from file chunk.
-  pub fn new(
+  /// Creates a new serialized page reader from file chunk.
+  fn new(
     buf: FileChunk,
     total_num_values: i64,
     compression: Compression

From 08fd9e803de7fe97d1fb26027cbfe17b91db7575 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Tue, 10 Apr 2018 21:13:11 +1200
Subject: [PATCH 18/32] add comment for column/reader.rs tests

---
 src/column/reader.rs | 65 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 7 deletions(-)

diff --git a/src/column/reader.rs b/src/column/reader.rs
index 0961681..27ecfa8 100644
--- a/src/column/reader.rs
+++ b/src/column/reader.rs
@@ -624,8 +624,51 @@ mod tests {
     );
   }
 
-  // == helper methods to make pages and test ==
-
+  // ----------------------------------------------------------------------
+  // Helper methods to make pages and test
+  //
+  // # Overview
+  //
+  // Most of the test functionality is implemented in `ColumnReaderTester`, which
+  // provides some general data page test methods:
+  // - `test_read_batch_general`
+  // - `test_read_batch`
+  //
+  // There are also some high level wrappers that are part of `ColumnReaderTester`:
+  // - `plain_v1` -> call `test_read_batch_general` with data page v1 and plain encoding
+  // - `plain_v2` -> call `test_read_batch_general` with data page v2 and plain encoding
+  // - `dict_v1` -> call `test_read_batch_general` with data page v1 + dictionary page
+  // - `dict_v2` -> call `test_read_batch_general` with data page v2 + dictionary page
+  //
+  // And even higher level wrappers that simplify testing of almost the same test cases:
+  // - `get_test_int32_type`, provides dummy schema type
+  // - `get_test_int64_type`, provides dummy schema type
+  // - `test_read_batch_int32`, wrapper for `read_batch` tests, since they are basically
+  //   the same, just different def/rep levels and batch size.
+  //
+  // # Page assembly
+  //
+  // Page construction and generation of values, definition and repetition levels happens
+  // in `make_pages` function.
+  // All values are randomly generated based on provided min/max, levels are calculated
+  // based on provided max level for column descriptor (which is basically either int32
+  // or int64 type in tests) and `levels_per_page` variable.
+  //
+  // We use `DataPageBuilder` and its implementation `DataPageBuilderImpl` to actually
+  // turn values, definition and repetition levels into data pages (either v1 or v2).
+  //
+  // Those data pages are then stored as part of `TestPageReader` (we just pass vector
+  // of generated pages directly), which implements `PageReader` interface.
+  //
+  // # Comparison
+  //
+  // This allows us to pass test page reader into column reader, so we can test
+  // functionality of column reader - see `test_read_batch`, where we create column
+  // reader -> typed column reader, buffer values in `read_batch` method and compare
+  // output with generated data.
+
+  // Returns dummy Parquet `Type` for primitive field, because most of our tests use
+  // INT32 physical type.
   fn get_test_int32_type() -> SchemaType {
     SchemaType::primitive_type_builder("a", PhysicalType::INT32)
       .with_repetition(Repetition::REQUIRED)
@@ -635,6 +678,7 @@ mod tests {
       .expect("build() should be OK")
   }
 
+  // Returns dummy Parquet `Type` for INT64 physical type.
   fn get_test_int64_type() -> SchemaType {
     SchemaType::primitive_type_builder("a", PhysicalType::INT64)
       .with_repetition(Repetition::REQUIRED)
@@ -644,7 +688,10 @@ mod tests {
       .expect("build() should be OK")
   }
 
-  // Tests `read_batch()` functionality for Int32Type.
+  // Tests `read_batch()` functionality for INT32.
+  //
+  // This is a high level wrapper on `ColumnReaderTester` that allows us to specify some
+  // boilerplate code for setting up definition/repetition levels and column descriptor.
   fn test_read_batch_int32(
     batch_size: usize,
     values: &mut[i32],
@@ -688,7 +735,7 @@ mod tests {
       Self { rep_levels: Vec::new(), def_levels: Vec::new(), values: Vec::new() }
     }
 
-    // method to generate and test data pages v1
+    // Method to generate and test data pages v1
     fn plain_v1(
       &mut self,
       desc: ColumnDescPtr,
@@ -710,7 +757,7 @@ mod tests {
       );
     }
 
-    // method to generate and test data pages v2
+    // Method to generate and test data pages v2
     fn plain_v2(
       &mut self,
       desc: ColumnDescPtr,
@@ -732,6 +779,7 @@ mod tests {
       );
     }
 
+    // Method to generate and test dictionary page + data pages v1
     fn dict_v1(
       &mut self,
       desc: ColumnDescPtr,
@@ -753,6 +801,7 @@ mod tests {
       );
     }
 
+    // Method to generate and test dictionary page + data pages v2
     fn dict_v2(
       &mut self,
       desc: ColumnDescPtr,
@@ -1000,7 +1049,10 @@ mod tests {
     ) where T: 'static {
       assert!(
         self.num_values >= values.len() as u32,
-        "num_values: {}, values.len(): {}", self.num_values, values.len());
+        "num_values: {}, values.len(): {}",
+        self.num_values,
+        values.len()
+      );
       self.encoding = Some(encoding);
       let mut encoder: Box<Encoder<T>> = get_encoder::<T>(
         self.desc.clone(), encoding, self.mem_tracker.clone()
@@ -1037,7 +1089,6 @@ mod tests {
         }
       }
     }
-
   }
 
   fn make_pages<T: DataType>(

From 655d5b6d1b579772bce1c4cbbd087427becbb32b Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 20:34:47 +1200
Subject: [PATCH 19/32] add parquet version, minor updates

---
 README.md    | 3 +++
 src/basic.rs | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a6c68ac..aedc5c7 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,9 @@ while let Some(record) = iter.next() {
 ```
 See crate documentation on available API.
 
+## Versions
+- [Parquet-format](https://github.com/apache/parquet-format) 2.3.2
+
 ## Requirements
 - Rust nightly
 - Thrift 0.11.0 or higher
diff --git a/src/basic.rs b/src/basic.rs
index c416465..8fbe39d 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -177,38 +177,45 @@ pub enum Encoding {
   /// - FLOAT - 4 bytes per value, stored as little-endian.
   /// - DOUBLE - 8 bytes per value, stored as little-endian.
   /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-  /// FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
+  /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
   PLAIN,
 
   /// **Deprecated** dictionary encoding.
+  ///
   /// The values in the dictionary are encoded using PLAIN encoding.
   /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and PLAIN
   /// encoding is used for dictionary page.
   PLAIN_DICTIONARY,
 
   /// Group packed run length encoding.
+  ///
   /// Usable for definition/repetition levels encoding and boolean values.
   RLE,
 
   /// Bit packed encoding.
+  ///
   /// This can only be used if the data has a known max width.
   /// Usable for definition/repetition levels encoding.
   BIT_PACKED,
 
   /// Delta encoding for integers, either INT32 or INT64.
+  ///
   /// Works best on sorted data.
   DELTA_BINARY_PACKED,
 
   /// Encoding for byte arrays to separate the length values and the data.
+  ///
   /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
   DELTA_LENGTH_BYTE_ARRAY,
 
   /// Incremental encoding for byte arrays.
+  ///
   /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
   /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
   DELTA_BYTE_ARRAY,
 
   /// Dictionary encoding.
+  ///
   /// The ids are encoded using the RLE encoding.
   RLE_DICTIONARY
 }

From c6f350ca194916f43a76cd5c98e97909ba0c32db Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 20:47:36 +1200
Subject: [PATCH 20/32] update comment for types.rs

---
 src/schema/types.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/schema/types.rs b/src/schema/types.rs
index 1277590..25e2056 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -738,8 +738,7 @@ fn build_tree(
   }
 }
 
-/// Conversion from Thrift equivalents.
-/// Should be considered private.
+/// Internal method to convert from Thrift.
 pub fn from_thrift(elements: &mut [SchemaElement]) -> Result<TypePtr> {
   let mut index = 0;
   let mut schema_nodes = Vec::new();

From 54a1c0dfdf29027cd0da8029f48918420f4e7e65 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 21:01:40 +1200
Subject: [PATCH 21/32] reexport modules

---
 benches/decoding.rs | 6 +++---
 benches/encoding.rs | 4 ++--
 src/lib.rs          | 7 ++++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/benches/decoding.rs b/benches/decoding.rs
index 10be4a3..ec1f097 100644
--- a/benches/decoding.rs
+++ b/benches/decoding.rs
@@ -30,9 +30,9 @@ use std::rc::Rc;
 
 use parquet::basic::*;
 use parquet::data_type::*;
-use parquet::encodings::decoding::*;
-use parquet::encodings::encoding::*;
-use parquet::util::memory::{ByteBufferPtr, MemTracker};
+use parquet::decoding::*;
+use parquet::encoding::*;
+use parquet::memory::{ByteBufferPtr, MemTracker};
 
 macro_rules! plain {
   ($fname:ident, $num_values:expr, $batch_size:expr, $ty:ident, $pty:expr,
diff --git a/benches/encoding.rs b/benches/encoding.rs
index b23ed02..e7ab04c 100644
--- a/benches/encoding.rs
+++ b/benches/encoding.rs
@@ -30,8 +30,8 @@ use std::rc::Rc;
 
 use parquet::basic::*;
 use parquet::data_type::*;
-use parquet::encodings::encoding::*;
-use parquet::util::memory::MemTracker;
+use parquet::encoding::*;
+use parquet::memory::MemTracker;
 
 macro_rules! plain {
   ($fname:ident, $batch_size:expr, $ty:ident, $pty:expr, $gen_data_fn:expr) => {
diff --git a/src/lib.rs b/src/lib.rs
index 7dfa94e..ff17447 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -137,10 +137,15 @@ pub mod basic;
 pub mod data_type;
 mod parquet_thrift;
 
+// Exported for external use, such as benchmarks
+pub use util::memory;
+pub use encodings::encoding;
+pub use encodings::decoding;
+
 #[macro_use]
 mod util;
-mod compression;
 mod encodings;
+pub mod compression;
 pub mod column;
 pub mod record;
 pub mod schema;

From c1308345c53f0fa2af0d5e81e64873a5b47da057 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 21:29:26 +1200
Subject: [PATCH 22/32] add compression docs

---
 src/compression.rs | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/compression.rs b/src/compression.rs
index 377653f..e54c0ec 100644
--- a/src/compression.rs
+++ b/src/compression.rs
@@ -15,6 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains codec interface and supported codec implementations.
+//!
+//! See [`Compression`](`CodecType`) for all available compression algorithms.
+//!
+//! # Example
+//!
+//! ```rust
+//! use parquet::basic::Compression;
+//! use parquet::compression::create_codec;
+//!
+//! let codec = create_codec(Compression::SNAPPY);
+//! assert!(codec.is_ok());
+//! ```
+
 use std::io::{Read, Write};
 
 use basic::Compression as CodecType;
@@ -25,6 +39,7 @@ use flate2::read::GzDecoder;
 use flate2::write::GzEncoder;
 use snap::{decompress_len, Decoder, Encoder};
 
+/// Parquet compression codec interface.
 pub trait Codec {
   /// Compresses data stored in slice `input_buf` and returns a new vector with the
   /// compressed data.
@@ -37,8 +52,7 @@ pub trait Codec {
   fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<usize>;
 }
 
-
-/// Given the compression type `codec`, returns a codec used to compress & decompress
+/// Given the compression type `codec`, returns a codec used to compress and decompress
 /// bytes for the compression type.
 /// This returns `None` if the codec type is `UNCOMPRESSED`.
 pub fn create_codec(codec: CodecType) -> Result<Option<Box<Codec>>> {
@@ -51,12 +65,14 @@ pub fn create_codec(codec: CodecType) -> Result<Option<Box<Codec>>> {
   }
 }
 
+/// Codec for Snappy compression format.
 pub struct SnappyCodec {
   decoder: Decoder,
   encoder: Encoder
 }
 
 impl SnappyCodec {
+  /// Creates new Snappy compression codec.
   fn new() -> Self {
     Self {
       decoder: Decoder::new(),
@@ -79,9 +95,11 @@ impl Codec for SnappyCodec {
   }
 }
 
+/// Codec for GZIP compression algorithm.
 pub struct GZipCodec {}
 
 impl GZipCodec {
+  /// Creates new GZIP compression codec.
   fn new() -> Self {
     Self {}
   }
@@ -108,9 +126,11 @@ const BROTLI_DEFAULT_BUFFER_SIZE: usize = 4096;
 const BROTLI_DEFAULT_COMPRESSION_QUALITY: u32 = 9; // supported levels 0-9
 const BROTLI_DEFAULT_LG_WINDOW_SIZE: u32 = 22; // recommended between 20-22
 
+/// Codec for Brotli compression algorithm.
 pub struct BrotliCodec {}
 
 impl BrotliCodec {
+  /// Creates new Brotli compression codec.
   fn new() -> Self {
     Self {}
   }

From e6c9803d4006b01e2140cf8a356c8b75b0c8f567 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 21:42:11 +1200
Subject: [PATCH 23/32] update docs

---
 src/compression.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/compression.rs b/src/compression.rs
index e54c0ec..a74c169 100644
--- a/src/compression.rs
+++ b/src/compression.rs
@@ -25,8 +25,18 @@
 //! use parquet::basic::Compression;
 //! use parquet::compression::create_codec;
 //!
-//! let codec = create_codec(Compression::SNAPPY);
-//! assert!(codec.is_ok());
+//! let mut codec = match create_codec(Compression::SNAPPY) {
+//!   Ok(Some(codec)) => codec,
+//!   _ => panic!()
+//! };
+//!
+//! let data = vec![b'p', b'a', b'r', b'q', b'u', b'e', b't'];
+//! let compressed = codec.compress(&data[..]).unwrap();
+//!
+//! let mut output = vec![];
+//! codec.decompress(&compressed[..], &mut output).unwrap();
+//!
+//! assert_eq!(output, data);
 //! ```
 
 use std::io::{Read, Write};

From 2e5924598f581d960939e62e3f9af351a2d57f09 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Wed, 11 Apr 2018 22:05:47 +1200
Subject: [PATCH 24/32] add memory docs

---
 src/util/memory.rs | 47 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/util/memory.rs b/src/util/memory.rs
index 0fe5106..72f0915 100644
--- a/src/util/memory.rs
+++ b/src/util/memory.rs
@@ -25,18 +25,21 @@ use std::rc::{Rc, Weak};
 // ----------------------------------------------------------------------
 // Memory Tracker classes
 
+/// Reference counted pointer for [`MemTracker`].
 pub type MemTrackerPtr = Rc<MemTracker>;
+/// Non-owning reference for [`MemTracker`].
 pub type WeakMemTrackerPtr = Weak<MemTracker>;
 
+/// Struct to track memory usage information.
 #[derive(Debug)]
 pub struct MemTracker {
-  // Memory usage information tracked by this. In the tuple, the first element is the
-  // current memory allocated (in bytes), and the second element is the maximum memory
-  // allocated so far (in bytes).
+  // In the tuple, the first element is the current memory allocated (in bytes),
+  // and the second element is the maximum memory allocated so far (in bytes).
   memory_usage: Cell<(i64, i64)>
 }
 
 impl MemTracker {
+  /// Creates new memory tracker.
   #[inline]
   pub fn new() -> MemTracker {
     MemTracker {
@@ -70,7 +73,9 @@ impl MemTracker {
 // ----------------------------------------------------------------------
 // Buffer classes
 
+/// Type alias for [`Buffer`].
 pub type ByteBuffer = Buffer<u8>;
+/// Type alias for [`BufferPtr`].
 pub type ByteBufferPtr = BufferPtr<u8>;
 
 /// A resize-able buffer class with generic member, with optional memory tracker.
@@ -88,6 +93,7 @@ pub struct Buffer<T: Clone> {
 }
 
 impl<T: Clone> Buffer<T> {
+  /// Creates new empty buffer.
   pub fn new() -> Self {
     Buffer {
       data: vec![],
@@ -96,6 +102,7 @@ impl<T: Clone> Buffer<T> {
     }
   }
 
+  /// Adds [`MemTracker`] for this buffer.
   #[inline]
   pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
     mc.alloc((self.data.capacity() * self.type_length) as i64);
@@ -103,11 +110,13 @@ impl<T: Clone> Buffer<T> {
     self
   }
 
+  /// Returns slice of data in this buffer.
   #[inline]
   pub fn data(&self) -> &[T] {
     self.data.as_slice()
   }
 
+  /// Sets data for this buffer.
   #[inline]
   pub fn set_data(&mut self, new_data: Vec<T>) {
     if let Some(ref mc) = self.mem_tracker {
@@ -117,6 +126,12 @@ impl<T: Clone> Buffer<T> {
     self.data = new_data;
   }
 
+  /// Resizes underlying data in place to a new length `new_size`.
+  ///
+  /// If `new_size` is less than current length, data is truncated, otherwise, it is
+  /// extended to `new_size` with provided default value `init_value`.
+  ///
+  /// Memory tracker is also updated, if available.
   #[inline]
   pub fn resize(&mut self, new_size: usize, init_value: T) {
     let old_capacity = self.data.capacity();
@@ -127,11 +142,15 @@ impl<T: Clone> Buffer<T> {
     }
   }
 
+  /// Clears underlying data.
   #[inline]
   pub fn clear(&mut self) {
     self.data.clear()
   }
 
+  /// Reserves capacity `additional_capacity` for underlying data vector.
+  ///
+  /// Memory tracker is also updated, if available.
   #[inline]
   pub fn reserve(&mut self, additional_capacity: usize) {
     let old_capacity = self.data.capacity();
@@ -144,6 +163,8 @@ impl<T: Clone> Buffer<T> {
     }
   }
 
+  /// Returns [`BufferPtr`] with buffer data.
+  /// Buffer data is reset.
   #[inline]
   pub fn consume(&mut self) -> BufferPtr<T> {
     let old_data = mem::replace(&mut self.data, vec![]);
@@ -154,26 +175,33 @@ impl<T: Clone> Buffer<T> {
     result
   }
 
+  /// Adds `value` to the buffer.
   #[inline]
   pub fn push(&mut self, value: T) {
     self.data.push(value)
   }
 
+  /// Returns current capacity for the buffer.
   #[inline]
   pub fn capacity(&self) -> usize {
     self.data.capacity()
   }
 
+  /// Returns current size for the buffer.
   #[inline]
   pub fn size(&self) -> usize {
     self.data.len()
   }
 
+  /// Returns `true` if memory tracker is added to buffer, `false` otherwise.
   #[inline]
   pub fn is_mem_tracked(&self) -> bool {
     self.mem_tracker.is_some()
   }
 
+  /// Returns memory tracker associated with this buffer.
+  /// This may panic, if memory tracker is not set, use method above to check if
+  /// memory tracker is available.
   #[inline]
   pub fn mem_tracker(&self) -> &MemTrackerPtr {
     self.mem_tracker.as_ref().unwrap()
@@ -245,6 +273,7 @@ pub struct BufferPtr<T> {
 }
 
 impl<T> BufferPtr<T> {
+  /// Creates new buffer from a vector.
   pub fn new(v: Vec<T>) -> Self {
     let len = v.len();
     Self {
@@ -255,10 +284,14 @@ impl<T> BufferPtr<T> {
     }
   }
 
+  /// Returns slice of data in this buffer.
   pub fn data(&self) -> &[T] {
     &self.data[self.start..self.start + self.len]
   }
 
+  /// Updates this buffer with new `start` position and length `len`.
+  ///
+  /// Range should be within current start position and length.
   pub fn with_range(mut self, start: usize, len: usize) -> Self {
     assert!(start <= self.len);
     assert!(start + len <= self.len);
@@ -267,23 +300,29 @@ impl<T> BufferPtr<T> {
     self
   }
 
+  /// Adds memory tracker to this buffer.
   pub fn with_mem_tracker(mut self, mc: MemTrackerPtr) -> Self {
     self.mem_tracker = Some(mc);
     self
   }
 
+  /// Returns start position of this buffer.
   pub fn start(&self) -> usize {
     self.start
   }
 
+  /// Returns length of this buffer
   pub fn len(&self) -> usize {
     self.len
   }
 
+  /// Returns `true` if this buffer has memory tracker, `false` otherwise.
   pub fn is_mem_tracked(&self) -> bool {
     self.mem_tracker.is_some()
   }
 
+  /// Returns a shallow copy of the buffer.
+  /// Reference counted pointer to the data is copied.
   pub fn all(&self) -> BufferPtr<T> {
     BufferPtr {
       data: self.data.clone(),
@@ -293,6 +332,7 @@ impl<T> BufferPtr<T> {
     }
   }
 
+  /// Returns a shallow copy of the buffer that starts with `start` position.
   pub fn start_from(&self, start: usize) -> BufferPtr<T> {
     assert!(start <= self.len);
     BufferPtr {
@@ -303,6 +343,7 @@ impl<T> BufferPtr<T> {
     }
   }
 
+  /// Returns a shallow copy that is a range slice within this buffer.
   pub fn range(&self, start: usize, len: usize) -> BufferPtr<T> {
     assert!(start + len <= self.len);
     BufferPtr {

From 5bc4cd60185f55c1b88c37f907eb08bb38c9c4e3 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 10:19:31 +1200
Subject: [PATCH 25/32] update global doc

---
 src/basic.rs              |  2 +-
 src/encodings/decoding.rs |  8 ++++----
 src/lib.rs                | 26 ++++++++++++++------------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/src/basic.rs b/src/basic.rs
index 8fbe39d..b7fc80e 100644
--- a/src/basic.rs
+++ b/src/basic.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 //! Contains Rust mappings for Thrift definition.
-//! See `parquet.thrift` file to see raw definitions of enums listed below.
+//! Refer to `parquet.thrift` file to see raw definitions.
 
 use std::convert;
 use std::fmt;
diff --git a/src/encodings/decoding.rs b/src/encodings/decoding.rs
index 4013db0..29ff56f 100644
--- a/src/encodings/decoding.rs
+++ b/src/encodings/decoding.rs
@@ -31,6 +31,7 @@ use util::memory::{ByteBuffer, ByteBufferPtr};
 // ----------------------------------------------------------------------
 // Decoders
 
+/// An Parquet decoder for the data type `T`.
 pub trait Decoder<T: DataType> {
   /// Sets the data to decode to be `data`, which should contain `num_values` of values
   /// to decode.
@@ -43,15 +44,15 @@ pub trait Decoder<T: DataType> {
   /// unless the remaining number of values is less than `buffer.len()`.
   fn get(&mut self, buffer: &mut [T::T]) -> Result<usize>;
 
-  /// Returns the number of values left in this decoder stream
+  /// Returns the number of values left in this decoder stream.
   fn values_left(&self) -> usize;
 
-  /// Returns the encoding for this decoder
+  /// Returns the encoding for this decoder.
   fn encoding(&self) -> Encoding;
 }
 
-
 /// Gets a decoder for the column descriptor `descr` and encoding type `encoding`.
+///
 /// NOTE: the primitive type in `descr` MUST match the data type `T`, otherwise
 /// disastrous consequence could occur.
 pub fn get_decoder<T: DataType>(
@@ -82,7 +83,6 @@ pub fn get_decoder<T: DataType>(
   Ok(decoder)
 }
 
-
 // ----------------------------------------------------------------------
 // PLAIN Decoding
 
diff --git a/src/lib.rs b/src/lib.rs
index ff17447..6a8c47a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -81,25 +81,27 @@
 //!
 //! # Metadata
 //!
-//! Module [`file::metadata`] contains Parquet metadata structs, including file metadata,
-//! that has information about file schema, version, and number of rows, row group
-//! metadata with a set of column chunks that contain column type and encodings, number
-//! of values and compressed/uncompressed size in bytes.
+//! Module [`metadata`](`file::metadata`) contains Parquet metadata structs, including
+//! file metadata, that has information about file schema, version, and number of rows,
+//! row group metadata with a set of column chunks that contain column type and encodings,
+//! number of values and compressed/uncompressed size in bytes.
 //!
 //! # Schema and type
 //!
-//! Parquet schema can be extracted from [`file::metadata::FileMetaData`] and is
-//! represented by Parquet type.
+//! Parquet schema can be extracted from [`FileMetaData`](`file::metadata::FileMetaData`)
+//! and is represented by Parquet type.
 //!
-//! Parquet type is described by [`schema::types::Type`], including top level message
-//! type or schema. Refer to the [`schema`] module for the detailed information on Type
-//! API, printing and parsing of message types.
+//! Parquet type is described by [`Type`](`schema::types::Type`), including top level
+//! message type or schema. Refer to the [`schema`] module for the detailed information
+//! on Type API, printing and parsing of message types.
 //!
 //! # File and row group API
 //!
-//! File reader [`file::reader::FileReader`] is a starting point for working with Parquet
-//! files. Provides set of methods to get file metadata, row group readers
-//! [`file::reader::RowGroupReader`] to get access to column readers and record iterator.
+//! Module [`file`] contains all definitions to explore Parquet files metadata and data.
+//! File reader [`FileReader`](`file::reader::FileReader`) is a starting point for
+//! working with Parquet files - it provides set of methods to get file metadata, row
+//! group readers [`RowGroupReader`](`file::reader::RowGroupReader`) to get access to
+//! column readers and record iterator.
 //!
 //! # Read API
 //!

From d566524c68e0f337594746147c842d15e98ca68d Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 20:53:53 +1200
Subject: [PATCH 26/32] add decoding docs

---
 src/encodings/decoding.rs | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/encodings/decoding.rs b/src/encodings/decoding.rs
index 29ff56f..be05e75 100644
--- a/src/encodings/decoding.rs
+++ b/src/encodings/decoding.rs
@@ -31,7 +31,7 @@ use util::memory::{ByteBuffer, ByteBufferPtr};
 // ----------------------------------------------------------------------
 // Decoders
 
-/// An Parquet decoder for the data type `T`.
+/// A Parquet decoder for the data type `T`.
 pub trait Decoder<T: DataType> {
   /// Sets the data to decode to be `data`, which should contain `num_values` of values
   /// to decode.
@@ -86,6 +86,10 @@ pub fn get_decoder<T: DataType>(
 // ----------------------------------------------------------------------
 // PLAIN Decoding
 
+/// Plain decoding that supports all types.
+/// Values are encoded back to back. For native types, data is encoded as little endian.
+/// Floating point types are encoded in IEEE.
+/// See [`PlainDecoder`](`::encoding::PlainEncoder`) for more information.
 pub struct PlainDecoder<T: DataType> {
   // The remaining number of values in the byte array
   num_values: usize,
@@ -107,6 +111,7 @@ pub struct PlainDecoder<T: DataType> {
 }
 
 impl<T: DataType> PlainDecoder<T> {
+  /// Creates new plain decoder.
   pub fn new(type_length: i32) -> Self {
     PlainDecoder {
       data: None,
@@ -253,10 +258,13 @@ impl Decoder<FixedLenByteArrayType> for PlainDecoder<FixedLenByteArrayType> {
   }
 }
 
-
 // ----------------------------------------------------------------------
 // RLE_DICTIONARY/PLAIN_DICTIONARY Decoding
 
+/// Dictionary decoder.
+/// The dictionary encoding builds a dictionary of values encountered in a given column.
+/// The dictionary is be stored in a dictionary page per column chunk.
+/// See [`DictEncoder`](`::encoding::DictEncoder`) for more information.
 pub struct DictDecoder<T: DataType> {
   // The dictionary, which maps ids to the values
   dictionary: Vec<T::T>,
@@ -272,6 +280,7 @@ pub struct DictDecoder<T: DataType> {
 }
 
 impl<T: DataType> DictDecoder<T> {
+  /// Creates new dictionary decoder.
   pub fn new() -> Self {
     Self {
       dictionary: vec![],
@@ -281,6 +290,7 @@ impl<T: DataType> DictDecoder<T> {
     }
   }
 
+  /// Decodes and sets values for dictionary using `decoder` decoder.
   pub fn set_dict(&mut self, mut decoder: Box<Decoder<T>>) -> Result<()> {
     let num_values = decoder.values_left();
     self.dictionary.resize(num_values, T::T::default());
@@ -325,6 +335,7 @@ impl<T: DataType> Decoder<T> for DictDecoder<T> {
 
 /// RLE/Bit-Packing hybrid decoding for values.
 /// Currently is used only for data pages v2 and supports boolean types.
+/// See [`RleValueEncoder`](`::encoding::RleValueEncoder`) for more information.
 pub struct RleValueDecoder<T: DataType> {
   values_left: usize,
   decoder: Option<RleDecoder>,
@@ -396,6 +407,9 @@ impl Decoder<BoolType> for RleValueDecoder<BoolType> {
 // ----------------------------------------------------------------------
 // DELTA_BINARY_PACKED Decoding
 
+/// Delta binary packed decoder.
+/// Supports INT32 and INT64 types.
+/// See [`DeltaBitPackEncoder`](`::encoding::DeltaBitPackEncoder`) for more information.
 pub struct DeltaBitPackDecoder<T: DataType> {
   bit_reader: BitReader,
   initialized: bool,
@@ -421,6 +435,7 @@ pub struct DeltaBitPackDecoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaBitPackDecoder<T> {
+  /// Creates new delta bit packed decoder.
   pub fn new() -> Self {
     Self {
       bit_reader: BitReader::from(vec![]),
@@ -441,11 +456,13 @@ impl<T: DataType> DeltaBitPackDecoder<T> {
     }
   }
 
+  /// Returns underlying bit reader offset.
   pub fn get_offset(&self) -> usize {
     assert!(self.initialized, "Bit reader is not initialized");
     self.bit_reader.get_byte_offset()
   }
 
+  /// Initializes new mini block.
   #[inline]
   fn init_block(&mut self) -> Result<()> {
     self.min_delta = self.bit_reader
@@ -467,6 +484,7 @@ impl<T: DataType> DeltaBitPackDecoder<T> {
     Ok(())
   }
 
+  /// Loads delta into mini block.
   #[inline]
   fn load_deltas_in_mini_block(&mut self) -> Result<()> {
     self.deltas_in_mini_block.clear();
@@ -564,8 +582,9 @@ impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T> {
   }
 }
 
-// Helper trait to define specific conversions when decoding values
+/// Helper trait to define specific conversions when decoding values
 trait DeltaBitPackDecoderConversion<T: DataType> {
+  /// Sets decoded value based on type `T`.
   #[inline]
   fn set_decoded_value(
     &self, buffer:
@@ -616,6 +635,11 @@ impl DeltaBitPackDecoderConversion<Int64Type> for DeltaBitPackDecoder<Int64Type>
 // ----------------------------------------------------------------------
 // DELTA_LENGTH_BYTE_ARRAY Decoding
 
+/// Delta length byte array decoder.
+/// Only applied to byte arrays to separate the length values and the data, the lengths
+/// are encoded using DELTA_BINARY_PACKED encoding.
+/// See [`DeltaLengthByteArrayEncoder`](`::encoding::DeltaLengthByteArrayEncoder`)
+/// for more information.
 pub struct DeltaLengthByteArrayDecoder<T: DataType> {
   // Lengths for each byte array in `data`
   // TODO: add memory tracker to this
@@ -638,6 +662,7 @@ pub struct DeltaLengthByteArrayDecoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaLengthByteArrayDecoder<T> {
+  /// Creates new delta length byte array decoder.
   pub fn new() -> Self {
     Self {
       lengths: vec![],
@@ -703,6 +728,11 @@ impl Decoder<ByteArrayType> for DeltaLengthByteArrayDecoder<ByteArrayType> {
 // ----------------------------------------------------------------------
 // DELTA_BYTE_ARRAY Decoding
 
+/// Delta byte array decoder.
+/// Prefix lengths are encoded using `DELTA_BINARY_PACKED` encoding, Suffixes are stored
+/// using `DELTA_LENGTH_BYTE_ARRAY` encoding.
+/// See [`DeltaByteArrayEncoder`](`::encoding::DeltaByteArrayEncoder`) for more
+/// information.
 pub struct DeltaByteArrayDecoder<T: DataType> {
   // Prefix lengths for each byte array
   // TODO: add memory tracker to this
@@ -725,6 +755,7 @@ pub struct DeltaByteArrayDecoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaByteArrayDecoder<T> {
+  /// Creates new delta byte array decoder.
   pub fn new() -> Self {
     Self {
       prefix_lengths: vec![],

From b96050be4ce6501a8df65417c36203768138d409 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 21:30:48 +1200
Subject: [PATCH 27/32] update docs

---
 src/encodings/decoding.rs |  2 ++
 src/encodings/encoding.rs | 59 +++++++++++++++++++++++++++++++++------
 src/schema/mod.rs         |  2 +-
 src/util/memory.rs        |  4 +--
 4 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/src/encodings/decoding.rs b/src/encodings/decoding.rs
index be05e75..2c77310 100644
--- a/src/encodings/decoding.rs
+++ b/src/encodings/decoding.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains all supported decoders for Parquet.
+
 use std::cmp;
 use std::marker::PhantomData;
 use std::mem;
diff --git a/src/encodings/encoding.rs b/src/encodings/encoding.rs
index f0ebb17..9de56b2 100644
--- a/src/encodings/encoding.rs
+++ b/src/encodings/encoding.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Contains all supported encoders for Parquet.
+
 use std::cmp;
 use std::io::Write;
 use std::marker::PhantomData;
@@ -49,7 +51,6 @@ pub trait Encoder<T: DataType> {
   fn flush_buffer(&mut self) -> Result<ByteBufferPtr>;
 }
 
-
 /// Gets a encoder for the particular data type `T` and encoding `encoding`. Memory usage
 /// for the encoder instance is tracked by `mem_tracker`.
 pub fn get_encoder<T: DataType>(
@@ -81,10 +82,20 @@ pub fn get_encoder<T: DataType>(
   Ok(encoder)
 }
 
-
 // ----------------------------------------------------------------------
 // Plain encoding
 
+/// Plain encoding that supports all types.
+/// Values are encoded back to back.
+/// The plain encoding is used whenever a more efficient encoding can not be used.
+/// It stores the data in the following format:
+/// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
+/// - INT32 - 4 bytes per value, stored as little-endian.
+/// - INT64 - 8 bytes per value, stored as little-endian.
+/// - FLOAT - 4 bytes per value, stored as IEEE little-endian.
+/// - DOUBLE - 8 bytes per value, stored as IEEE little-endian.
+/// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+/// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
 pub struct PlainEncoder<T: DataType> {
   buffer: ByteBuffer,
   bit_writer: BitWriter,
@@ -93,6 +104,7 @@ pub struct PlainEncoder<T: DataType> {
 }
 
 impl<T: DataType> PlainEncoder<T> {
+  /// Creates new plain encoder.
   pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr, vec: Vec<u8>) -> Self {
     let mut byte_buffer = ByteBuffer::new().with_mem_tracker(mem_tracker);
     byte_buffer.set_data(vec);
@@ -171,7 +183,6 @@ impl Encoder<FixedLenByteArrayType> for PlainEncoder<FixedLenByteArrayType> {
   }
 }
 
-
 // ----------------------------------------------------------------------
 // Dictionary encoding
 
@@ -179,6 +190,16 @@ const INITIAL_HASH_TABLE_SIZE: usize = 1024;
 const MAX_HASH_LOAD: f32 = 0.7;
 const HASH_SLOT_EMPTY: i32 = -1;
 
+/// Dictionary encoder.
+/// The dictionary encoding builds a dictionary of values encountered in a given column.
+/// The dictionary page is written first, before the data pages of the column chunk.
+///
+/// Dictionary page format: the entries in the dictionary - in dictionary order -
+/// using the plain encoding.
+///
+/// Data page format: the bit width used to encode the entry ids stored as 1 byte
+/// (max bit width = 32), followed by the values encoded using RLE/Bit packed described
+/// above (with the given bit width).
 pub struct DictEncoder<T: DataType> {
   // Descriptor for the column to be encoded.
   desc: ColumnDescPtr,
@@ -209,6 +230,7 @@ pub struct DictEncoder<T: DataType> {
 }
 
 impl<T: DataType> DictEncoder<T> {
+  /// Creates new dictionary encoder.
   pub fn new(desc: ColumnDescPtr, mem_tracker: MemTrackerPtr) -> Self {
     let mut slots = Buffer::new().with_mem_tracker(mem_tracker.clone());
     slots.resize(INITIAL_HASH_TABLE_SIZE, -1);
@@ -224,6 +246,7 @@ impl<T: DataType> DictEncoder<T> {
     }
   }
 
+  /// Returns number of unique entries in the dictionary.
   pub fn num_entries(&self) -> usize {
     self.uniques.size()
   }
@@ -368,6 +391,7 @@ pub struct RleValueEncoder<T: DataType> {
 }
 
 impl<T: DataType> RleValueEncoder<T> {
+  /// Creates new rle value encoder.
   pub fn new() -> Self {
     Self {
       encoder: None,
@@ -439,19 +463,29 @@ const MAX_BIT_WRITER_SIZE: usize = 10 * 1024 * 1024;
 const DEFAULT_BLOCK_SIZE: usize = 128;
 const DEFAULT_NUM_MINI_BLOCKS: usize = 4;
 
+/// Delta bit packed encoder.
+/// Consists of a header followed by blocks of delta encoded values binary packed.
+///
 /// Delta-binary-packing:
+/// ```
 ///   [page-header] [block 1], [block 2], ... [block N]
+/// ```
+///
 /// Each page header consists of:
+/// ```
 ///   [block size] [number of miniblocks in a block] [total value count] [first value]
+/// ```
+///
 /// Each block consists of:
+/// ```
 ///   [min delta] [list of bitwidths of miniblocks] [miniblocks]
+/// ```
 ///
 /// Current implementation writes values in `put` method, multiple calls to `put` to
 /// existing block or start new block if block size is exceeded. Calling `flush_buffer`
 /// writes out all data and resets internal state, including page header.
 ///
-/// Supports only Int32Type and Int64Type.
-///
+/// Supports only INT32 and INT64.
 pub struct DeltaBitPackEncoder<T: DataType> {
   page_header_writer: BitWriter,
   bit_writer: BitWriter,
@@ -467,6 +501,7 @@ pub struct DeltaBitPackEncoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaBitPackEncoder<T> {
+  /// Creates new delta bit packed encoder.
   pub fn new() -> Self {
     let block_size = DEFAULT_BLOCK_SIZE;
     let num_mini_blocks = DEFAULT_NUM_MINI_BLOCKS;
@@ -489,8 +524,8 @@ impl<T: DataType> DeltaBitPackEncoder<T> {
     }
   }
 
-  // Writes page header for blocks, this method is invoked when we are done encoding
-  // values. It is also okay to encode when no values have been provided
+  /// Writes page header for blocks, this method is invoked when we are done encoding
+  /// values. It is also okay to encode when no values have been provided
   fn write_page_header(&mut self) {
     // We ignore the result of each 'put' operation, because MAX_PAGE_HEADER_WRITER_SIZE
     // is chosen to fit all header values and guarantees that writes will not fail.
@@ -629,7 +664,7 @@ impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
   }
 }
 
-// Helper trait to define specific conversions and subtractions when computing deltas
+/// Helper trait to define specific conversions and subtractions when computing deltas
 trait DeltaBitPackEncoderConversion<T: DataType> {
   // Method should panic if type is not supported, otherwise no-op
   #[inline]
@@ -709,6 +744,9 @@ impl DeltaBitPackEncoderConversion<Int64Type> for DeltaBitPackEncoder<Int64Type>
   }
 }
 
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encoding
+
 /// Encoding for byte arrays to separate the length values and the data.
 /// The lengths are encoded using DELTA_BINARY_PACKED encoding, data is
 /// stored as raw bytes.
@@ -721,6 +759,7 @@ pub struct DeltaLengthByteArrayEncoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaLengthByteArrayEncoder<T> {
+  /// Creates new delta length byte array encoder.
   pub fn new() -> Self {
     Self {
       len_encoder: DeltaBitPackEncoder::new(),
@@ -767,6 +806,9 @@ impl Encoder<ByteArrayType> for DeltaLengthByteArrayEncoder<ByteArrayType> {
   }
 }
 
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY encoding
+
 /// Encoding for byte arrays, prefix lengths are encoded using DELTA_BINARY_PACKED
 /// encoding, followed by suffixes with DELTA_LENGTH_BYTE_ARRAY encoding.
 pub struct DeltaByteArrayEncoder<T: DataType> {
@@ -777,6 +819,7 @@ pub struct DeltaByteArrayEncoder<T: DataType> {
 }
 
 impl<T: DataType> DeltaByteArrayEncoder<T> {
+  /// Creates new delta byte array encoder.
   pub fn new() -> Self {
     Self {
       prefix_len_encoder: DeltaBitPackEncoder::<Int32Type>::new(),
diff --git a/src/schema/mod.rs b/src/schema/mod.rs
index 5580ec1..ee086e0 100644
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Contains definitions of Parquet schema and methods to print and parse schema.
+//! Parquet schema definitions and methods to print and parse schema.
 //!
 //! # Example
 //!
diff --git a/src/util/memory.rs b/src/util/memory.rs
index 72f0915..4032eca 100644
--- a/src/util/memory.rs
+++ b/src/util/memory.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Utility methods and structs for working with memory.
+
 use std::cell::Cell;
 use std::fmt::{Debug, Display, Formatter, Result as FmtResult};
 use std::io::{Result as IoResult, Write};
@@ -69,7 +71,6 @@ impl MemTracker {
   }
 }
 
-
 // ----------------------------------------------------------------------
 // Buffer classes
 
@@ -256,7 +257,6 @@ impl<T: Clone> Drop for Buffer<T> {
   }
 }
 
-
 // ----------------------------------------------------------------------
 // Immutable Buffer (BufferPtr) classes
 

From 605a1fa32fb70733cc2634197628197f9a64cee4 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 21:49:58 +1200
Subject: [PATCH 28/32] update docs

---
 src/column/page.rs    |  4 ++--
 src/compression.rs    |  3 ++-
 src/record/reader.rs  | 11 ++++++-----
 src/schema/parser.rs  |  9 +++++----
 src/schema/printer.rs |  7 ++++---
 src/schema/types.rs   | 22 +++++++++++-----------
 6 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/src/column/page.rs b/src/column/page.rs
index cc24b39..870532f 100644
--- a/src/column/page.rs
+++ b/src/column/page.rs
@@ -53,7 +53,7 @@ pub enum Page {
 }
 
 impl Page {
-  /// Returns [`PageType`] for this page.
+  /// Returns [`PageType`](`::basic::PageType`) for this page.
   pub fn page_type(&self) -> PageType {
     match self {
       &Page::DataPage { .. } => PageType::DATA_PAGE,
@@ -80,7 +80,7 @@ impl Page {
     }
   }
 
-  /// Returns this page [`Encoding`].
+  /// Returns this page [`Encoding`](`::basic::Encoding`).
   pub fn encoding(&self) -> Encoding {
     match self {
       &Page::DataPage { encoding, .. } => encoding,
diff --git a/src/compression.rs b/src/compression.rs
index a74c169..e5c3326 100644
--- a/src/compression.rs
+++ b/src/compression.rs
@@ -17,7 +17,8 @@
 
 //! Contains codec interface and supported codec implementations.
 //!
-//! See [`Compression`](`CodecType`) for all available compression algorithms.
+//! See [`Compression`](`::basic::Compression`) enum for all available compression
+//! algorithms.
 //!
 //! # Example
 //!
diff --git a/src/record/reader.rs b/src/record/reader.rs
index c8b4eaf..24bd3b5 100644
--- a/src/record/reader.rs
+++ b/src/record/reader.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Contains implementation of record assembly and converting Parquet types into `Row`s.
+//! Contains implementation of record assembly and converting Parquet types into
+//! [`Row`](`::record::api::Row`)s.
 
 use std::collections::HashMap;
 use std::rc::Rc;
@@ -479,7 +480,7 @@ impl Reader {
 // ----------------------------------------------------------------------
 // Row iterators
 
-/// Iterator of [`Row`]s.
+/// Iterator of [`Row`](`::record::api::Row`)s.
 /// It is used either for a single row group to iterate over data in that row group, or
 /// an entire file with auto buffering of all row groups.
 pub struct RowIter<'a> {
@@ -492,7 +493,7 @@ pub struct RowIter<'a> {
 }
 
 impl<'a> RowIter<'a> {
-  /// Creates iterator of [`Row`]s for all row groups in a file.
+  /// Creates iterator of [`Row`](`::record::api::Row`)s for all row groups in a file.
   pub fn from_file(proj: Option<Type>, reader: &'a FileReader) -> Result<Self> {
     let descr = Self::get_proj_descr(proj,
       reader.metadata().file_metadata().schema_descr_ptr())?;
@@ -508,7 +509,7 @@ impl<'a> RowIter<'a> {
     })
   }
 
-  /// Creates iterator of [`Row`]s for a specific row group.
+  /// Creates iterator of [`Row`](`::record::api::Row`)s for a specific row group.
   pub fn from_row_group(proj: Option<Type>, reader: &'a RowGroupReader) -> Result<Self> {
     let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
     let tree_builder = Self::tree_builder();
@@ -582,7 +583,7 @@ impl<'a> Iterator for RowIter<'a> {
   }
 }
 
-/// Internal iterator of [`Row`]s for a reader.
+/// Internal iterator of [`Row`](`::record::api::Row`)s for a reader.
 pub struct ReaderIter {
   root_reader: Reader,
   records_left: usize
diff --git a/src/schema/parser.rs b/src/schema/parser.rs
index 7f704bc..15c0c45 100644
--- a/src/schema/parser.rs
+++ b/src/schema/parser.rs
@@ -16,7 +16,8 @@
 // under the License.
 
 //! Parquet schema parser.
-//! Provides methods to parse and validate string message type into Parquet [`Type`].
+//! Provides methods to parse and validate string message type into Parquet
+//! [`Type`](`::schema::types::Type`).
 //!
 //! # Example
 //!
@@ -47,9 +48,9 @@ use basic::{LogicalType, Repetition, Type as PhysicalType};
 use errors::{ParquetError, Result};
 use schema::types::{Type, TypePtr};
 
-/// Parses message type as string into a Parquet `Type` which, for example, could be
-/// used to extract individual columns. Returns Parquet general error when parsing or
-/// validation fails.
+/// Parses message type as string into a Parquet [`Type`](`::schema::types::Type`) which,
+/// for example, could be used to extract individual columns. Returns Parquet general
+/// error when parsing or validation fails.
 pub fn parse_message_type<'a>(message_type: &'a str) -> Result<Type> {
   let mut parser = Parser { tokenizer: &mut Tokenizer::from_str(message_type) };
   parser.parse_message_type()
diff --git a/src/schema/printer.rs b/src/schema/printer.rs
index f17b302..f34a6e8 100644
--- a/src/schema/printer.rs
+++ b/src/schema/printer.rs
@@ -56,7 +56,8 @@ use file::metadata::{
 };
 use schema::types::Type;
 
-/// Prints Parquet metadata [`ParquetMetaData`] information.
+/// Prints Parquet metadata [`ParquetMetaData`](`::file::metadata::ParquetMetaData`)
+/// information.
 #[allow(unused_must_use)]
 pub fn print_parquet_metadata(out: &mut io::Write, metadata: &ParquetMetaData) {
   print_file_metadata(out, &metadata.file_metadata());
@@ -72,7 +73,7 @@ pub fn print_parquet_metadata(out: &mut io::Write, metadata: &ParquetMetaData) {
   }
 }
 
-/// Prints file metadata [`FileMetaData`] information.
+/// Prints file metadata [`FileMetaData`](`::file::metadata::FileMetaData`) information.
 #[allow(unused_must_use)]
 pub fn print_file_metadata(out: &mut io::Write, file_metadata: &FileMetaData) {
   writeln!(out, "version: {}", file_metadata.version());
@@ -84,7 +85,7 @@ pub fn print_file_metadata(out: &mut io::Write, file_metadata: &FileMetaData) {
   print_schema(out, schema);
 }
 
-/// Prints Parquet [`Type`] information.
+/// Prints Parquet [`Type`](`::schema::types::Type`) information.
 #[allow(unused_must_use)]
 pub fn print_schema(out: &mut io::Write, tp: &Type) {
   // TODO: better if we can pass fmt::Write to Printer.
diff --git a/src/schema/types.rs b/src/schema/types.rs
index 25e2056..5fb4717 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -188,13 +188,13 @@ impl<'a> PrimitiveTypeBuilder<'a> {
     }
   }
 
-  /// Sets [`Repetition`] for this field and returns itself.
+  /// Sets [`Repetition`](`::basic::Repetition`) for this field and returns itself.
   pub fn with_repetition(mut self, repetition: Repetition) -> Self {
     self.repetition = repetition;
     self
   }
 
-  /// Sets [`LogicalType`] for this field and returns itself.
+  /// Sets [`LogicalType`](`::basic::LogicalType`) for this field and returns itself.
   pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
     self.logical_type = logical_type;
     self
@@ -339,13 +339,13 @@ impl<'a> GroupTypeBuilder<'a> {
     }
   }
 
-  /// Sets [`Repetition`] for this field and returns itself.
+  /// Sets [`Repetition`](`::basic::Repetition`) for this field and returns itself.
   pub fn with_repetition(mut self, repetition: Repetition) -> Self {
     self.repetition = Some(repetition);
     self
   }
 
-  /// Sets [`LogicalType`] for this field and returns itself.
+  /// Sets [`LogicalType`](`::basic::LogicalType`) for this field and returns itself.
   pub fn with_logical_type(mut self, logical_type: LogicalType) -> Self {
     self.logical_type = logical_type;
     self
@@ -402,13 +402,13 @@ impl BasicTypeInfo {
     self.repetition.is_some()
   }
 
-  /// Returns [`Repetition`] value for the type.
+  /// Returns [`Repetition`](`::basic::Repetition`) value for the type.
   pub fn repetition(&self) -> Repetition {
     assert!(self.repetition.is_some());
     self.repetition.unwrap()
   }
 
-  /// Returns [`LogicalType`] value for the type.
+  /// Returns [`LogicalType`](`::basic::LogicalType`) value for the type.
   pub fn logical_type(&self) -> LogicalType {
     self.logical_type
   }
@@ -425,7 +425,6 @@ impl BasicTypeInfo {
   }
 }
 
-
 // ----------------------------------------------------------------------
 // Parquet descriptor definitions
 
@@ -541,7 +540,8 @@ impl ColumnDescriptor {
     &self.path
   }
 
-  /// Returns root [`Type`] (most top-level parent field) for this leaf column.
+  /// Returns root [`Type`](`::schema::types::Type`) (most top-level parent field) for
+  /// this leaf column.
   pub fn root_type(&self) -> &Type {
     assert!(self.root_type.is_some());
     self.root_type.as_ref().unwrap()
@@ -552,7 +552,7 @@ impl ColumnDescriptor {
     self.primitive_type.name()
   }
 
-  /// Returns [`LogicalType`] for this column.
+  /// Returns [`LogicalType`](`::basic::LogicalType`) for this column.
   pub fn logical_type(&self) -> LogicalType {
     self.primitive_type.get_basic_info().logical_type()
   }
@@ -662,7 +662,7 @@ impl SchemaDescriptor {
     self.leaves.len()
   }
 
-  /// Returns column root [`Type`] for a field position.
+  /// Returns column root [`Type`](`::schema::types::Type`) for a field position.
   pub fn get_column_root(&self, i: usize) -> &Type {
     assert!(
       i < self.leaves.len(),
@@ -675,7 +675,7 @@ impl SchemaDescriptor {
     result.unwrap().as_ref()
   }
 
-  /// Returns schema as [`Type`].
+  /// Returns schema as [`Type`](`::schema::types::Type`).
   pub fn root_schema(&self) -> &Type {
     self.schema.as_ref()
   }

From c78f550150dba5f1793e002c31cd25e2766930e4 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 21:51:46 +1200
Subject: [PATCH 29/32] fix doc tests

---
 src/encodings/encoding.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/encodings/encoding.rs b/src/encodings/encoding.rs
index 9de56b2..068b159 100644
--- a/src/encodings/encoding.rs
+++ b/src/encodings/encoding.rs
@@ -467,17 +467,17 @@ const DEFAULT_NUM_MINI_BLOCKS: usize = 4;
 /// Consists of a header followed by blocks of delta encoded values binary packed.
 ///
 /// Delta-binary-packing:
-/// ```
+/// ```shell
 ///   [page-header] [block 1], [block 2], ... [block N]
 /// ```
 ///
 /// Each page header consists of:
-/// ```
+/// ```shell
 ///   [block size] [number of miniblocks in a block] [total value count] [first value]
 /// ```
 ///
 /// Each block consists of:
-/// ```
+/// ```shell
 ///   [min delta] [list of bitwidths of miniblocks] [miniblocks]
 /// ```
 ///

From af1342170a6b9bc44e4bf81434f533ab7a154c41 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Thu, 12 Apr 2018 22:06:30 +1200
Subject: [PATCH 30/32] update from_thrift comments

---
 src/file/metadata.rs | 6 +++---
 src/schema/types.rs  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/file/metadata.rs b/src/file/metadata.rs
index 245daf3..c0b070b 100644
--- a/src/file/metadata.rs
+++ b/src/file/metadata.rs
@@ -194,7 +194,7 @@ impl RowGroupMetaData {
     self.schema_descr.clone()
   }
 
-  /// Internal method to convert from Thrift.
+  /// Method to convert from Thrift.
   pub fn from_thrift(
     schema_descr: SchemaDescPtr,
     mut rg: RowGroup
@@ -323,8 +323,8 @@ impl ColumnChunkMetaData {
     self.dictionary_page_offset
   }
 
-  /// Internal method to convert from Thrift.
-  pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
+  /// Method to convert from Thrift.
+  fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
     if cc.meta_data.is_none() {
       return Err(general_err!("Expected to have column metadata"));
     }
diff --git a/src/schema/types.rs b/src/schema/types.rs
index 5fb4717..0e176e7 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -738,7 +738,7 @@ fn build_tree(
   }
 }
 
-/// Internal method to convert from Thrift.
+/// Method to convert from Thrift.
 pub fn from_thrift(elements: &mut [SchemaElement]) -> Result<TypePtr> {
   let mut index = 0;
   let mut schema_nodes = Vec::new();

From 51a4965b8cb431d6b25499e08fb32fc114152530 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Fri, 13 Apr 2018 20:28:13 +1200
Subject: [PATCH 31/32] update parquet version in readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index aedc5c7..48b83cd 100644
--- a/README.md
+++ b/README.md
@@ -35,8 +35,8 @@ while let Some(record) = iter.next() {
 ```
 See crate documentation on available API.
 
-## Versions
-- [Parquet-format](https://github.com/apache/parquet-format) 2.3.2
+## Supported Parquet Version
+- Parquet-format 2.4.0
 
 ## Requirements
 - Rust nightly

From 1a11586528897e8da652faa656699df00075f7a3 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@gmail.com>
Date: Fri, 13 Apr 2018 20:52:48 +1200
Subject: [PATCH 32/32] update comments

---
 src/schema/types.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/schema/types.rs b/src/schema/types.rs
index 0e176e7..2651752 100644
--- a/src/schema/types.rs
+++ b/src/schema/types.rs
@@ -229,8 +229,8 @@ impl<'a> PrimitiveTypeBuilder<'a> {
     self
   }
 
-  /// Creates a new `PrimitiveType` instance from the gathered attributes.
-  /// Also checks various illegal conditions and returns `Err` if that that happen.
+  /// Creates a new `PrimitiveType` instance from the collected attributes.
+  /// Returns `Err` in case of any building conditions are not met.
   pub fn build(self) -> Result<Type> {
     let basic_info = BasicTypeInfo {
       name: String::from(self.name),