diff --git a/automl/google/cloud/automl_v1beta1/gapic/enums.py b/automl/google/cloud/automl_v1beta1/gapic/enums.py
index a34ed876b1ca..9d817f8ec807 100644
--- a/automl/google/cloud/automl_v1beta1/gapic/enums.py
+++ b/automl/google/cloud/automl_v1beta1/gapic/enums.py
@@ -83,6 +83,65 @@ class TypeCode(enum.IntEnum):
     CATEGORY = 10
 
 
+class Document(object):
+    class Layout(object):
+        class TextSegmentType(enum.IntEnum):
+            """
+            The type of TextSegment in the context of the original document.
+
+            Attributes:
+              TEXT_SEGMENT_TYPE_UNSPECIFIED (int): Should not be used.
+              TOKEN (int): The text segment is a token. e.g. word.
+              PARAGRAPH (int): The text segment is a paragraph.
+              FORM_FIELD (int): The text segment is a form field.
+              FORM_FIELD_NAME (int): The text segment is the name part of a form field. It will be treated as
+              child of another FORM\_FIELD TextSegment if its span is subspan of
+              another TextSegment with type FORM\_FIELD.
+              FORM_FIELD_CONTENTS (int): The text segment is the text content part of a form field. It will be
+              treated as child of another FORM\_FIELD TextSegment if its span is
+              subspan of another TextSegment with type FORM\_FIELD.
+              TABLE (int): The text segment is a whole table, including headers, and all rows.
+              TABLE_HEADER (int): The text segment is a table's headers. It will be treated as child of
+              another TABLE TextSegment if its span is subspan of another TextSegment
+              with type TABLE.
+              TABLE_ROW (int): The text segment is a row in table. It will be treated as child of
+              another TABLE TextSegment if its span is subspan of another TextSegment
+              with type TABLE.
+              TABLE_CELL (int): The text segment is a cell in table. It will be treated as child of
+              another TABLE\_ROW TextSegment if its span is subspan of another
+              TextSegment with type TABLE\_ROW.
+            """
+
+            TEXT_SEGMENT_TYPE_UNSPECIFIED = 0
+            TOKEN = 1
+            PARAGRAPH = 2
+            FORM_FIELD = 3
+            FORM_FIELD_NAME = 4
+            FORM_FIELD_CONTENTS = 5
+            TABLE = 6
+            TABLE_HEADER = 7
+            TABLE_ROW = 8
+            TABLE_CELL = 9
+
+
+class DocumentDimensions(object):
+    class DocumentDimensionUnit(enum.IntEnum):
+        """
+        Unit of the document dimension.
+
+        Attributes:
+          DOCUMENT_DIMENSION_UNIT_UNSPECIFIED (int): Should not be used.
+          INCH (int): Document dimension is measured in inches.
+          CENTIMETER (int): Document dimension is measured in centimeters.
+          POINT (int): Document dimension is measured in points. 72 points = 1 inch.
+        """
+
+        DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0
+        INCH = 1
+        CENTIMETER = 2
+        POINT = 3
+
+
 class Model(object):
     class DeploymentState(enum.IntEnum):
         """
diff --git a/automl/google/cloud/automl_v1beta1/proto/data_items.proto b/automl/google/cloud/automl_v1beta1/proto/data_items.proto
index fec75d3bbd73..fdbea7042d84 100644
--- a/automl/google/cloud/automl_v1beta1/proto/data_items.proto
+++ b/automl/google/cloud/automl_v1beta1/proto/data_items.proto
@@ -17,11 +17,13 @@ syntax = "proto3";
 
 package google.cloud.automl.v1beta1;
 
+import "google/api/annotations.proto";
+import "google/cloud/automl/v1beta1/geometry.proto";
 import "google/cloud/automl/v1beta1/io.proto";
+import "google/cloud/automl/v1beta1/text_segment.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/struct.proto";
-import "google/api/annotations.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_multiple_files = true;
@@ -56,19 +58,135 @@ message TextSnippet {
   // characters long.
   string content = 1;
 
-  // Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed
-  // values are "text/html" and "text/plain". If left blank, the format is
-  // automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content].
+  // Optional. The format of
+  // [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the
+  // only two allowed values are "text/html" and "text/plain". If left blank,
+  // the format is automatically determined from the type of the uploaded
+  // [content][google.cloud.automl.v1beta1.TextSnippet.content].
   string mime_type = 2;
 
   // Output only. HTTP URI where you can download the content.
   string content_uri = 4;
 }
 
+// Message that describes dimension of a document.
+message DocumentDimensions {
+  // Unit of the document dimension.
+  enum DocumentDimensionUnit {
+    // Should not be used.
+    DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0;
+
+    // Document dimension is measured in inches.
+    INCH = 1;
+
+    // Document dimension is measured in centimeters.
+    CENTIMETER = 2;
+
+    // Document dimension is measured in points. 72 points = 1 inch.
+    POINT = 3;
+  }
+
+  // Unit of the dimension.
+  DocumentDimensionUnit unit = 1;
+
+  // Width value of the document, works together with the unit.
+  float width = 2;
+
+  // Height value of the document, works together with the unit.
+  float height = 3;
+}
+
 // A structured text document e.g. a PDF.
 message Document {
+  // Describes the layout information of a
+  // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in
+  // the document.
+  message Layout {
+    // The type of TextSegment in the context of the original document.
+    enum TextSegmentType {
+      // Should not be used.
+      TEXT_SEGMENT_TYPE_UNSPECIFIED = 0;
+
+      // The text segment is a token. e.g. word.
+      TOKEN = 1;
+
+      // The text segment is a paragraph.
+      PARAGRAPH = 2;
+
+      // The text segment is a form field.
+      FORM_FIELD = 3;
+
+      // The text segment is the name part of a form field. It will be treated
+      // as child of another FORM_FIELD TextSegment if its span is subspan of
+      // another TextSegment with type FORM_FIELD.
+      FORM_FIELD_NAME = 4;
+
+      // The text segment is the text content part of a form field. It will be
+      // treated as child of another FORM_FIELD TextSegment if its span is
+      // subspan of another TextSegment with type FORM_FIELD.
+      FORM_FIELD_CONTENTS = 5;
+
+      // The text segment is a whole table, including headers, and all rows.
+      TABLE = 6;
+
+      // The text segment is a table's headers. It will be treated as child of
+      // another TABLE TextSegment if its span is subspan of another TextSegment
+      // with type TABLE.
+      TABLE_HEADER = 7;
+
+      // The text segment is a row in table. It will be treated as child of
+      // another TABLE TextSegment if its span is subspan of another TextSegment
+      // with type TABLE.
+      TABLE_ROW = 8;
+
+      // The text segment is a cell in table. It will be treated as child of
+      // another TABLE_ROW TextSegment if its span is subspan of another
+      // TextSegment with type TABLE_ROW.
+      TABLE_CELL = 9;
+    }
+
+    // Text Segment that represents a segment in
+    // [document_text][google.cloud.automl.v1beta1.Document.document_text].
+    TextSegment text_segment = 1;
+
+    // Page number of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in the original document, starts from 1.
+    int32 page_number = 2;
+
+    // The position of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in the page. Contains exactly 4
+    //
+    // [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices]
+    // and they are connected by edges in the order provided, which will
+    // represent a rectangle parallel to the frame. The
+    // [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are
+    // relative to the page.
+    // Coordinates are based on top-left as point (0,0).
+    BoundingPoly bounding_poly = 3;
+
+    // The type of the
+    // [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment]
+    // in document.
+    TextSegmentType text_segment_type = 4;
+  }
+
   // An input config specifying the content of the document.
   DocumentInputConfig input_config = 1;
+
+  // The plain text version of this document.
+  TextSnippet document_text = 2;
+
+  // Describes the layout of the document.
+  // Sorted by [page_number][].
+  repeated Layout layout = 3;
+
+  // The dimensions of the page in the document.
+  DocumentDimensions document_dimensions = 4;
+
+  // Number of pages in the document.
+  int32 page_count = 5;
 }
 
 // A representation of a row in a relational table.
diff --git a/automl/google/cloud/automl_v1beta1/proto/data_items_pb2.py b/automl/google/cloud/automl_v1beta1/proto/data_items_pb2.py
index 75d030ce87e6..ee388d632364 100644
--- a/automl/google/cloud/automl_v1beta1/proto/data_items_pb2.py
+++ b/automl/google/cloud/automl_v1beta1/proto/data_items_pb2.py
@@ -15,13 +15,19 @@
 _sym_db = _symbol_database.Default()
 
 
+from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
+from google.cloud.automl_v1beta1.proto import (
+    geometry_pb2 as google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_geometry__pb2,
+)
 from google.cloud.automl_v1beta1.proto import (
     io_pb2 as google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_io__pb2,
 )
+from google.cloud.automl_v1beta1.proto import (
+    text_segment_pb2 as google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_text__segment__pb2,
+)
 from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
 from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2
 from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
-from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
 
 
 DESCRIPTOR = _descriptor.FileDescriptor(
@@ -32,18 +38,107 @@
         "\n\037com.google.cloud.automl.v1beta1P\001ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\312\002\033Google\\Cloud\\AutoMl\\V1beta1\352\002\036Google::Cloud::AutoML::V1beta1"
     ),
     serialized_pb=_b(
-        '\n2google/cloud/automl_v1beta1/proto/data_items.proto\x12\x1bgoogle.cloud.automl.v1beta1\x1a*google/cloud/automl_v1beta1/proto/io.proto\x1a\x19google/protobuf/any.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1cgoogle/api/annotations.proto"\x7f\n\x05Image\x12\x15\n\x0bimage_bytes\x18\x01 \x01(\x0cH\x00\x12@\n\x0cinput_config\x18\x06 \x01(\x0b\x32(.google.cloud.automl.v1beta1.InputConfigH\x00\x12\x15\n\rthumbnail_uri\x18\x04 \x01(\tB\x06\n\x04\x64\x61ta"F\n\x0bTextSnippet\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x11\n\tmime_type\x18\x02 \x01(\t\x12\x13\n\x0b\x63ontent_uri\x18\x04 \x01(\t"R\n\x08\x44ocument\x12\x46\n\x0cinput_config\x18\x01 \x01(\x0b\x32\x30.google.cloud.automl.v1beta1.DocumentInputConfig"F\n\x03Row\x12\x17\n\x0f\x63olumn_spec_ids\x18\x02 \x03(\t\x12&\n\x06values\x18\x03 \x03(\x0b\x32\x16.google.protobuf.Value"\xfe\x01\n\x0e\x45xamplePayload\x12\x33\n\x05image\x18\x01 \x01(\x0b\x32".google.cloud.automl.v1beta1.ImageH\x00\x12@\n\x0ctext_snippet\x18\x02 \x01(\x0b\x32(.google.cloud.automl.v1beta1.TextSnippetH\x00\x12\x39\n\x08\x64ocument\x18\x04 \x01(\x0b\x32%.google.cloud.automl.v1beta1.DocumentH\x00\x12/\n\x03row\x18\x03 \x01(\x0b\x32 .google.cloud.automl.v1beta1.RowH\x00\x42\t\n\x07payloadB\xa5\x01\n\x1f\x63om.google.cloud.automl.v1beta1P\x01ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\xca\x02\x1bGoogle\\Cloud\\AutoMl\\V1beta1\xea\x02\x1eGoogle::Cloud::AutoML::V1beta1b\x06proto3'
+        '\n2google/cloud/automl_v1beta1/proto/data_items.proto\x12\x1bgoogle.cloud.automl.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x30google/cloud/automl_v1beta1/proto/geometry.proto\x1a*google/cloud/automl_v1beta1/proto/io.proto\x1a\x34google/cloud/automl_v1beta1/proto/text_segment.proto\x1a\x19google/protobuf/any.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1cgoogle/protobuf/struct.proto"\x7f\n\x05Image\x12\x15\n\x0bimage_bytes\x18\x01 \x01(\x0cH\x00\x12@\n\x0cinput_config\x18\x06 \x01(\x0b\x32(.google.cloud.automl.v1beta1.InputConfigH\x00\x12\x15\n\rthumbnail_uri\x18\x04 \x01(\tB\x06\n\x04\x64\x61ta"F\n\x0bTextSnippet\x12\x0f\n\x07\x63ontent\x18\x01 \x01(\t\x12\x11\n\tmime_type\x18\x02 \x01(\t\x12\x13\n\x0b\x63ontent_uri\x18\x04 \x01(\t"\xef\x01\n\x12\x44ocumentDimensions\x12S\n\x04unit\x18\x01 \x01(\x0e\x32\x45.google.cloud.automl.v1beta1.DocumentDimensions.DocumentDimensionUnit\x12\r\n\x05width\x18\x02 \x01(\x02\x12\x0e\n\x06height\x18\x03 \x01(\x02"e\n\x15\x44ocumentDimensionUnit\x12\'\n#DOCUMENT_DIMENSION_UNIT_UNSPECIFIED\x10\x00\x12\x08\n\x04INCH\x10\x01\x12\x0e\n\nCENTIMETER\x10\x02\x12\t\n\x05POINT\x10\x03"\xf9\x05\n\x08\x44ocument\x12\x46\n\x0cinput_config\x18\x01 \x01(\x0b\x32\x30.google.cloud.automl.v1beta1.DocumentInputConfig\x12?\n\rdocument_text\x18\x02 \x01(\x0b\x32(.google.cloud.automl.v1beta1.TextSnippet\x12<\n\x06layout\x18\x03 \x03(\x0b\x32,.google.cloud.automl.v1beta1.Document.Layout\x12L\n\x13\x64ocument_dimensions\x18\x04 \x01(\x0b\x32/.google.cloud.automl.v1beta1.DocumentDimensions\x12\x12\n\npage_count\x18\x05 \x01(\x05\x1a\xc3\x03\n\x06Layout\x12>\n\x0ctext_segment\x18\x01 \x01(\x0b\x32(.google.cloud.automl.v1beta1.TextSegment\x12\x13\n\x0bpage_number\x18\x02 \x01(\x05\x12@\n\rbounding_poly\x18\x03 \x01(\x0b\x32).google.cloud.automl.v1beta1.BoundingPoly\x12W\n\x11text_segment_type\x18\x04 \x01(\x0e\x32<.google.cloud.automl.v1beta1.Document.Layout.TextSegmentType"\xc8\x01\n\x0fTextSegmentType\x12!\n\x1dTEXT_SEGMENT_TYPE_UNSPECIFIED\x10\x00\x12\t\n\x05TOKEN\x10\x01\x12\r\n\tPARAGRAPH\x10\x02\x12\x0e\n\nFORM_FIELD\x10\x03\x12\x13\n\x0f\x46ORM_FIELD_NAME\x10\x04\x12\x17\n\x13\x46ORM_FIELD_CONTENTS\x10\x05\x12\t\n\x05TABLE\x10\x06\x12\x10\n\x0cTABLE_HEADER\x10\x07\x12\r\n\tTABLE_ROW\x10\x08\x12\x0e\n\nTABLE_CELL\x10\t"F\n\x03Row\x12\x17\n\x0f\x63olumn_spec_ids\x18\x02 \x03(\t\x12&\n\x06values\x18\x03 \x03(\x0b\x32\x16.google.protobuf.Value"\xfe\x01\n\x0e\x45xamplePayload\x12\x33\n\x05image\x18\x01 \x01(\x0b\x32".google.cloud.automl.v1beta1.ImageH\x00\x12@\n\x0ctext_snippet\x18\x02 \x01(\x0b\x32(.google.cloud.automl.v1beta1.TextSnippetH\x00\x12\x39\n\x08\x64ocument\x18\x04 \x01(\x0b\x32%.google.cloud.automl.v1beta1.DocumentH\x00\x12/\n\x03row\x18\x03 \x01(\x0b\x32 .google.cloud.automl.v1beta1.RowH\x00\x42\t\n\x07payloadB\xa5\x01\n\x1f\x63om.google.cloud.automl.v1beta1P\x01ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\xca\x02\x1bGoogle\\Cloud\\AutoMl\\V1beta1\xea\x02\x1eGoogle::Cloud::AutoML::V1beta1b\x06proto3'
     ),
     dependencies=[
+        google_dot_api_dot_annotations__pb2.DESCRIPTOR,
+        google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_geometry__pb2.DESCRIPTOR,
         google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_io__pb2.DESCRIPTOR,
+        google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_text__segment__pb2.DESCRIPTOR,
         google_dot_protobuf_dot_any__pb2.DESCRIPTOR,
         google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,
         google_dot_protobuf_dot_struct__pb2.DESCRIPTOR,
-        google_dot_api_dot_annotations__pb2.DESCRIPTOR,
     ],
 )
 
 
+_DOCUMENTDIMENSIONS_DOCUMENTDIMENSIONUNIT = _descriptor.EnumDescriptor(
+    name="DocumentDimensionUnit",
+    full_name="google.cloud.automl.v1beta1.DocumentDimensions.DocumentDimensionUnit",
+    filename=None,
+    file=DESCRIPTOR,
+    values=[
+        _descriptor.EnumValueDescriptor(
+            name="DOCUMENT_DIMENSION_UNIT_UNSPECIFIED",
+            index=0,
+            number=0,
+            serialized_options=None,
+            type=None,
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="INCH", index=1, number=1, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="CENTIMETER", index=2, number=2, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="POINT", index=3, number=3, serialized_options=None, type=None
+        ),
+    ],
+    containing_type=None,
+    serialized_options=None,
+    serialized_start=690,
+    serialized_end=791,
+)
+_sym_db.RegisterEnumDescriptor(_DOCUMENTDIMENSIONS_DOCUMENTDIMENSIONUNIT)
+
+_DOCUMENT_LAYOUT_TEXTSEGMENTTYPE = _descriptor.EnumDescriptor(
+    name="TextSegmentType",
+    full_name="google.cloud.automl.v1beta1.Document.Layout.TextSegmentType",
+    filename=None,
+    file=DESCRIPTOR,
+    values=[
+        _descriptor.EnumValueDescriptor(
+            name="TEXT_SEGMENT_TYPE_UNSPECIFIED",
+            index=0,
+            number=0,
+            serialized_options=None,
+            type=None,
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="TOKEN", index=1, number=1, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="PARAGRAPH", index=2, number=2, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="FORM_FIELD", index=3, number=3, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="FORM_FIELD_NAME",
+            index=4,
+            number=4,
+            serialized_options=None,
+            type=None,
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="FORM_FIELD_CONTENTS",
+            index=5,
+            number=5,
+            serialized_options=None,
+            type=None,
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="TABLE", index=6, number=6, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="TABLE_HEADER", index=7, number=7, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="TABLE_ROW", index=8, number=8, serialized_options=None, type=None
+        ),
+        _descriptor.EnumValueDescriptor(
+            name="TABLE_CELL", index=9, number=9, serialized_options=None, type=None
+        ),
+    ],
+    containing_type=None,
+    serialized_options=None,
+    serialized_start=1355,
+    serialized_end=1555,
+)
+_sym_db.RegisterEnumDescriptor(_DOCUMENT_LAYOUT_TEXTSEGMENTTYPE)
+
+
 _IMAGE = _descriptor.Descriptor(
     name="Image",
     full_name="google.cloud.automl.v1beta1.Image",
@@ -122,8 +217,8 @@
             fields=[],
         )
     ],
-    serialized_start=246,
-    serialized_end=373,
+    serialized_start=350,
+    serialized_end=477,
 )
 
 
@@ -197,11 +292,178 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=375,
-    serialized_end=445,
+    serialized_start=479,
+    serialized_end=549,
+)
+
+
+_DOCUMENTDIMENSIONS = _descriptor.Descriptor(
+    name="DocumentDimensions",
+    full_name="google.cloud.automl.v1beta1.DocumentDimensions",
+    filename=None,
+    file=DESCRIPTOR,
+    containing_type=None,
+    fields=[
+        _descriptor.FieldDescriptor(
+            name="unit",
+            full_name="google.cloud.automl.v1beta1.DocumentDimensions.unit",
+            index=0,
+            number=1,
+            type=14,
+            cpp_type=8,
+            label=1,
+            has_default_value=False,
+            default_value=0,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="width",
+            full_name="google.cloud.automl.v1beta1.DocumentDimensions.width",
+            index=1,
+            number=2,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="height",
+            full_name="google.cloud.automl.v1beta1.DocumentDimensions.height",
+            index=2,
+            number=3,
+            type=2,
+            cpp_type=6,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+    ],
+    extensions=[],
+    nested_types=[],
+    enum_types=[_DOCUMENTDIMENSIONS_DOCUMENTDIMENSIONUNIT],
+    serialized_options=None,
+    is_extendable=False,
+    syntax="proto3",
+    extension_ranges=[],
+    oneofs=[],
+    serialized_start=552,
+    serialized_end=791,
 )
 
 
+_DOCUMENT_LAYOUT = _descriptor.Descriptor(
+    name="Layout",
+    full_name="google.cloud.automl.v1beta1.Document.Layout",
+    filename=None,
+    file=DESCRIPTOR,
+    containing_type=None,
+    fields=[
+        _descriptor.FieldDescriptor(
+            name="text_segment",
+            full_name="google.cloud.automl.v1beta1.Document.Layout.text_segment",
+            index=0,
+            number=1,
+            type=11,
+            cpp_type=10,
+            label=1,
+            has_default_value=False,
+            default_value=None,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="page_number",
+            full_name="google.cloud.automl.v1beta1.Document.Layout.page_number",
+            index=1,
+            number=2,
+            type=5,
+            cpp_type=1,
+            label=1,
+            has_default_value=False,
+            default_value=0,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="bounding_poly",
+            full_name="google.cloud.automl.v1beta1.Document.Layout.bounding_poly",
+            index=2,
+            number=3,
+            type=11,
+            cpp_type=10,
+            label=1,
+            has_default_value=False,
+            default_value=None,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="text_segment_type",
+            full_name="google.cloud.automl.v1beta1.Document.Layout.text_segment_type",
+            index=3,
+            number=4,
+            type=14,
+            cpp_type=8,
+            label=1,
+            has_default_value=False,
+            default_value=0,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+    ],
+    extensions=[],
+    nested_types=[],
+    enum_types=[_DOCUMENT_LAYOUT_TEXTSEGMENTTYPE],
+    serialized_options=None,
+    is_extendable=False,
+    syntax="proto3",
+    extension_ranges=[],
+    oneofs=[],
+    serialized_start=1104,
+    serialized_end=1555,
+)
+
 _DOCUMENT = _descriptor.Descriptor(
     name="Document",
     full_name="google.cloud.automl.v1beta1.Document",
@@ -226,18 +488,90 @@
             extension_scope=None,
             serialized_options=None,
             file=DESCRIPTOR,
-        )
+        ),
+        _descriptor.FieldDescriptor(
+            name="document_text",
+            full_name="google.cloud.automl.v1beta1.Document.document_text",
+            index=1,
+            number=2,
+            type=11,
+            cpp_type=10,
+            label=1,
+            has_default_value=False,
+            default_value=None,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="layout",
+            full_name="google.cloud.automl.v1beta1.Document.layout",
+            index=2,
+            number=3,
+            type=11,
+            cpp_type=10,
+            label=3,
+            has_default_value=False,
+            default_value=[],
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="document_dimensions",
+            full_name="google.cloud.automl.v1beta1.Document.document_dimensions",
+            index=3,
+            number=4,
+            type=11,
+            cpp_type=10,
+            label=1,
+            has_default_value=False,
+            default_value=None,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
+        _descriptor.FieldDescriptor(
+            name="page_count",
+            full_name="google.cloud.automl.v1beta1.Document.page_count",
+            index=4,
+            number=5,
+            type=5,
+            cpp_type=1,
+            label=1,
+            has_default_value=False,
+            default_value=0,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
     ],
     extensions=[],
-    nested_types=[],
+    nested_types=[_DOCUMENT_LAYOUT],
     enum_types=[],
     serialized_options=None,
     is_extendable=False,
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=447,
-    serialized_end=529,
+    serialized_start=794,
+    serialized_end=1555,
 )
 
 
@@ -293,8 +627,8 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=531,
-    serialized_end=601,
+    serialized_start=1557,
+    serialized_end=1627,
 )
 
 
@@ -394,8 +728,8 @@
             fields=[],
         )
     ],
-    serialized_start=604,
-    serialized_end=858,
+    serialized_start=1630,
+    serialized_end=1884,
 )
 
 _IMAGE.fields_by_name[
@@ -405,11 +739,33 @@
 _IMAGE.fields_by_name["image_bytes"].containing_oneof = _IMAGE.oneofs_by_name["data"]
 _IMAGE.oneofs_by_name["data"].fields.append(_IMAGE.fields_by_name["input_config"])
 _IMAGE.fields_by_name["input_config"].containing_oneof = _IMAGE.oneofs_by_name["data"]
+_DOCUMENTDIMENSIONS.fields_by_name[
+    "unit"
+].enum_type = _DOCUMENTDIMENSIONS_DOCUMENTDIMENSIONUNIT
+_DOCUMENTDIMENSIONS_DOCUMENTDIMENSIONUNIT.containing_type = _DOCUMENTDIMENSIONS
+_DOCUMENT_LAYOUT.fields_by_name[
+    "text_segment"
+].message_type = (
+    google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_text__segment__pb2._TEXTSEGMENT
+)
+_DOCUMENT_LAYOUT.fields_by_name[
+    "bounding_poly"
+].message_type = (
+    google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_geometry__pb2._BOUNDINGPOLY
+)
+_DOCUMENT_LAYOUT.fields_by_name[
+    "text_segment_type"
+].enum_type = _DOCUMENT_LAYOUT_TEXTSEGMENTTYPE
+_DOCUMENT_LAYOUT.containing_type = _DOCUMENT
+_DOCUMENT_LAYOUT_TEXTSEGMENTTYPE.containing_type = _DOCUMENT_LAYOUT
 _DOCUMENT.fields_by_name[
     "input_config"
 ].message_type = (
     google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_io__pb2._DOCUMENTINPUTCONFIG
 )
+_DOCUMENT.fields_by_name["document_text"].message_type = _TEXTSNIPPET
+_DOCUMENT.fields_by_name["layout"].message_type = _DOCUMENT_LAYOUT
+_DOCUMENT.fields_by_name["document_dimensions"].message_type = _DOCUMENTDIMENSIONS
 _ROW.fields_by_name["values"].message_type = google_dot_protobuf_dot_struct__pb2._VALUE
 _EXAMPLEPAYLOAD.fields_by_name["image"].message_type = _IMAGE
 _EXAMPLEPAYLOAD.fields_by_name["text_snippet"].message_type = _TEXTSNIPPET
@@ -441,6 +797,7 @@
 ]
 DESCRIPTOR.message_types_by_name["Image"] = _IMAGE
 DESCRIPTOR.message_types_by_name["TextSnippet"] = _TEXTSNIPPET
+DESCRIPTOR.message_types_by_name["DocumentDimensions"] = _DOCUMENTDIMENSIONS
 DESCRIPTOR.message_types_by_name["Document"] = _DOCUMENT
 DESCRIPTOR.message_types_by_name["Row"] = _ROW
 DESCRIPTOR.message_types_by_name["ExamplePayload"] = _EXAMPLEPAYLOAD
@@ -505,10 +862,67 @@
 )
 _sym_db.RegisterMessage(TextSnippet)
 
+DocumentDimensions = _reflection.GeneratedProtocolMessageType(
+    "DocumentDimensions",
+    (_message.Message,),
+    dict(
+        DESCRIPTOR=_DOCUMENTDIMENSIONS,
+        __module__="google.cloud.automl_v1beta1.proto.data_items_pb2",
+        __doc__="""Message that describes dimension of a document.
+  
+  
+  Attributes:
+      unit:
+          Unit of the dimension.
+      width:
+          Width value of the document, works together with the unit.
+      height:
+          Height value of the document, works together with the unit.
+  """,
+        # @@protoc_insertion_point(class_scope:google.cloud.automl.v1beta1.DocumentDimensions)
+    ),
+)
+_sym_db.RegisterMessage(DocumentDimensions)
+
 Document = _reflection.GeneratedProtocolMessageType(
     "Document",
     (_message.Message,),
     dict(
+        Layout=_reflection.GeneratedProtocolMessageType(
+            "Layout",
+            (_message.Message,),
+            dict(
+                DESCRIPTOR=_DOCUMENT_LAYOUT,
+                __module__="google.cloud.automl_v1beta1.proto.data_items_pb2",
+                __doc__="""Describes the layout information of a
+    [text\_segment][google.cloud.automl.v1beta1.Document.Layout.text\_segment]
+    in the document.
+    
+    
+    Attributes:
+        text_segment:
+            Text Segment that represents a segment in [document\_text][goo
+            gle.cloud.automl.v1beta1.Document.document\_text].
+        page_number:
+            Page number of the [text\_segment][google.cloud.automl.v1beta1
+            .Document.Layout.text\_segment] in the original document,
+            starts from 1.
+        bounding_poly:
+            The position of the [text\_segment][google.cloud.automl.v1beta
+            1.Document.Layout.text\_segment] in the page. Contains exactly
+            4  [normalized\_vertices][google.cloud.automl.v1beta1.Bounding
+            Poly.normalized\_vertices] and they are connected by edges in
+            the order provided, which will represent a rectangle parallel
+            to the frame. The [NormalizedVertex-s][google.cloud.automl.v1b
+            eta1.NormalizedVertex] are relative to the page. Coordinates
+            are based on top-left as point (0,0).
+        text_segment_type:
+            The type of the [text\_segment][google.cloud.automl.v1beta1.Do
+            cument.Layout.text\_segment] in document.
+    """,
+                # @@protoc_insertion_point(class_scope:google.cloud.automl.v1beta1.Document.Layout)
+            ),
+        ),
         DESCRIPTOR=_DOCUMENT,
         __module__="google.cloud.automl_v1beta1.proto.data_items_pb2",
         __doc__="""A structured text document e.g. a PDF.
@@ -517,11 +931,21 @@
   Attributes:
       input_config:
           An input config specifying the content of the document.
+      document_text:
+          The plain text version of this document.
+      layout:
+          Describes the layout of the document. Sorted by
+          [page\_number][].
+      document_dimensions:
+          The dimensions of the page in the document.
+      page_count:
+          Number of pages in the document.
   """,
         # @@protoc_insertion_point(class_scope:google.cloud.automl.v1beta1.Document)
     ),
 )
 _sym_db.RegisterMessage(Document)
+_sym_db.RegisterMessage(Document.Layout)
 
 Row = _reflection.GeneratedProtocolMessageType(
     "Row",
diff --git a/automl/google/cloud/automl_v1beta1/proto/io.proto b/automl/google/cloud/automl_v1beta1/proto/io.proto
index c08edc277474..6f007f02a10d 100644
--- a/automl/google/cloud/automl_v1beta1/proto/io.proto
+++ b/automl/google/cloud/automl_v1beta1/proto/io.proto
@@ -140,7 +140,8 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 //         CSV file(s) with each line in format:
 //           ML_USE,GCS_FILE_PATH
 //           GCS_FILE_PATH leads to a .JSONL (that is, JSON Lines) file which
-//           either imports text in-line or as documents.
+//           either imports text in-line or as documents. Any given
+//           .JSONL file must be 100MB or smaller.
 //           The in-line .JSONL file contains, per line, a proto that wraps a
 //           TextSnippet proto (in json representation) followed by one or more
 //           AnnotationPayload protos (called annotations), which have
@@ -148,12 +149,16 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 //           is expected to be annotated exhaustively, for example, if you look
 //           for animals and text contains "dolphin" that is not labeled, then
 //           "dolphin" is assumed to not be an animal. Any given text snippet
-//           content must have 30,000 characters or less,  and also be UTF-8 NFC
-//           encoded (ASCII already is).           The document .JSONL file contains, per line, a proto that wraps a
-//           Document proto with input_config set. Only PDF documents are
-//           supported now, and each document may be up to 2MB large. Currently
-//           annotations on documents cannot be specified at import. Any given
-//           .JSONL file must be 100MB or smaller.
+//           content must be 10KB or smaller, and also be UTF-8 NFC encoded
+//           (ASCII already is).
+//           The document .JSONL file contains, per line, a proto that wraps a
+//           Document proto. The Document proto must have either document_text
+//           or input_config set. In document_text case, the Document proto may
+//           also contain the spatial information of the document, including
+//           layout, document dimension and page number. In input_config case,
+//           only PDF documents are supported now, and each document may be up
+//           to 2MB large. Currently, annotations on documents cannot be
+//           specified at import.
 //         Three sample CSV rows:
 //           TRAIN,gs://folder/file1.jsonl
 //           VALIDATE,gs://folder/file2.jsonl
@@ -162,27 +167,61 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 //         with artificial line breaks, but the only actual line break is
 //         denoted by \n).:
 //           {
-//             "text_snippet": {
-//               "content": "dog car cat"
-//             }             "annotations": [
-//               {
-//                 "display_name": "animal",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 0, "end_offset": 3}
+//             "document": {
+//               "document_text": {"content": "dog cat"}
+//               "layout": [
+//                 {
+//                   "text_segment": {
+//                     "start_offset": 0,
+//                     "end_offset": 3,
+//                   },
+//                   "page_number": 1,
+//                   "bounding_poly": {
+//                     "normalized_vertices": [
+//                       {"x": 0.1, "y": 0.1},
+//                       {"x": 0.1, "y": 0.3},
+//                       {"x": 0.3, "y": 0.3},
+//                       {"x": 0.3, "y": 0.1},
+//                     ],
+//                   },
+//                   "text_segment_type": TOKEN,
+//                 },
+//                 {
+//                   "text_segment": {
+//                     "start_offset": 4,
+//                     "end_offset": 7,
+//                   },
+//                   "page_number": 1,
+//                   "bounding_poly": {
+//                     "normalized_vertices": [
+//                       {"x": 0.4, "y": 0.1},
+//                       {"x": 0.4, "y": 0.3},
+//                       {"x": 0.8, "y": 0.3},
+//                       {"x": 0.8, "y": 0.1},
+//                     ],
+//                   },
+//                   "text_segment_type": TOKEN,
 //                 }
-//               },
+//
+//               ],
+//               "document_dimensions": {
+//                 "width": 8.27,
+//                 "height": 11.69,
+//                 "unit": INCH,
+//               }
+//               "page_count": 1,
+//             },
+//             "annotations": [
 //               {
-//                 "display_name": "vehicle",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 4, "end_offset": 7}
-//                 }
+//                 "display_name": "animal",
+//                 "text_extraction": {"text_segment": {"start_offset": 0,
+//                 "end_offset": 3}}
 //               },
 //               {
 //                 "display_name": "animal",
-//                 "text_extraction": {
-//                   "text_segment": {"start_offset": 8, "end_offset": 11}
-//                 }
-//               },
+//                 "text_extraction": {"text_segment": {"start_offset": 4,
+//                 "end_offset": 7}}
+//               }
 //             ],
 //           }\n
 //           {
diff --git a/automl/google/cloud/automl_v1beta1/proto/io_pb2.py b/automl/google/cloud/automl_v1beta1/proto/io_pb2.py
index 161b9d25ad62..62cd25fdd121 100644
--- a/automl/google/cloud/automl_v1beta1/proto/io_pb2.py
+++ b/automl/google/cloud/automl_v1beta1/proto/io_pb2.py
@@ -1029,20 +1029,23 @@
   -  For Text Extraction: CSV file(s) with each line in format:
      ML\_USE,GCS\_FILE\_PATH GCS\_FILE\_PATH leads to a .JSONL (that is,
      JSON Lines) file which either imports text in-line or as documents.
-     The in-line .JSONL file contains, per line, a proto that wraps a
-     TextSnippet proto (in json representation) followed by one or more
-     AnnotationPayload protos (called annotations), which have
-     display\_name and text\_extraction detail populated. The given text
-     is expected to be annotated exhaustively, for example, if you look
-     for animals and text contains "dolphin" that is not labeled, then
-     "dolphin" is assumed to not be an animal. Any given text snippet
-     content must have 30,000 characters or less, and also be UTF-8 NFC
-     encoded (ASCII already is). The document .JSONL file contains, per
-     line, a proto that wraps a Document proto with input\_config set.
-     Only PDF documents are supported now, and each document may be up to
-     2MB large. Currently annotations on documents cannot be specified at
-     import. Any given .JSONL file must be 100MB or smaller. Three sample
-     CSV rows: TRAIN,gs://folder/file1.jsonl
+     Any given .JSONL file must be 100MB or smaller. The in-line .JSONL
+     file contains, per line, a proto that wraps a TextSnippet proto (in
+     json representation) followed by one or more AnnotationPayload protos
+     (called annotations), which have display\_name and text\_extraction
+     detail populated. The given text is expected to be annotated
+     exhaustively, for example, if you look for animals and text contains
+     "dolphin" that is not labeled, then "dolphin" is assumed to not be an
+     animal. Any given text snippet content must be 10KB or smaller, and
+     also be UTF-8 NFC encoded (ASCII already is). The document .JSONL
+     file contains, per line, a proto that wraps a Document proto. The
+     Document proto must have either document\_text or input\_config set.
+     In document\_text case, the Document proto may also contain the
+     spatial information of the document, including layout, document
+     dimension and page number. In input\_config case, only PDF documents
+     are supported now, and each document may be up to 2MB large.
+     Currently, annotations on documents cannot be specified at import.
+     Three sample CSV rows: TRAIN,gs://folder/file1.jsonl
      VALIDATE,gs://folder/file2.jsonl TEST,gs://folder/file3.jsonl 
   
   -  For Text Classification: CSV file(s) with each line in format:
diff --git a/automl/google/cloud/automl_v1beta1/proto/prediction_service.proto b/automl/google/cloud/automl_v1beta1/proto/prediction_service.proto
index 243849213e38..57f1b794e716 100644
--- a/automl/google/cloud/automl_v1beta1/proto/prediction_service.proto
+++ b/automl/google/cloud/automl_v1beta1/proto/prediction_service.proto
@@ -18,12 +18,12 @@ syntax = "proto3";
 package google.cloud.automl.v1beta1;
 
 import "google/api/annotations.proto";
+import "google/api/client.proto";
 import "google/cloud/automl/v1beta1/annotation_payload.proto";
 import "google/cloud/automl/v1beta1/data_items.proto";
 import "google/cloud/automl/v1beta1/io.proto";
 import "google/cloud/automl/v1beta1/operations.proto";
 import "google/longrunning/operations.proto";
-import "google/api/client.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_multiple_files = true;
@@ -38,7 +38,8 @@ option ruby_package = "Google::Cloud::AutoML::V1beta1";
 // snake_case or kebab-case, either of those cases is accepted.
 service PredictionService {
   option (google.api.default_host) = "automl.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Perform an online prediction. The prediction result will be directly
   // returned in the response.
@@ -66,12 +67,14 @@ service PredictionService {
     };
   }
 
-  // Perform a batch prediction. Unlike the online [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
+  // Perform a batch prediction. Unlike the online
+  // [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
   // prediction result won't be immediately available in the response. Instead,
   // a long running operation object is returned. User can poll the operation
   // result via [GetOperation][google.longrunning.Operations.GetOperation]
-  // method. Once the operation is done, [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is returned in
-  // the [response][google.longrunning.Operation.response] field.
+  // method. Once the operation is done,
+  // [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is
+  // returned in the [response][google.longrunning.Operation.response] field.
   // Available for following ML problems:
   // * Image Classification
   // * Image Object Detection
@@ -86,7 +89,8 @@ service PredictionService {
   }
 }
 
-// Request message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
+// Request message for
+// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
 message PredictRequest {
   // Name of the model requested to serve the prediction.
   string name = 1;
@@ -122,12 +126,20 @@ message PredictRequest {
   map<string, string> params = 3;
 }
 
-// Response message for [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
+// Response message for
+// [PredictionService.Predict][google.cloud.automl.v1beta1.PredictionService.Predict].
 message PredictResponse {
   // Prediction result.
   // Translation and Text Sentiment will return precisely one payload.
   repeated AnnotationPayload payload = 1;
 
+  // The preprocessed example that AutoML actually makes prediction on.
+  // Empty if AutoML does not preprocess the input example.
+  // * For Text Extraction:
+  //   If the input is a .pdf file, the OCR'ed text will be provided in
+  //   [document_text][google.cloud.automl.v1beta1.Document.document_text].
+  ExamplePayload preprocessed_input = 3;
+
   // Additional domain-specific prediction response metadata.
   //
   // * For Image Object Detection:
@@ -146,7 +158,8 @@ message PredictResponse {
   map<string, string> metadata = 2;
 }
 
-// Request message for [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
+// Request message for
+// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
 message BatchPredictRequest {
   // Name of the model requested to serve the batch prediction.
   string name = 1;
@@ -226,7 +239,8 @@ message BatchPredictRequest {
 
 // Result of the Batch Predict. This message is returned in
 // [response][google.longrunning.Operation.response] of the operation returned
-// by the [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
+// by the
+// [PredictionService.BatchPredict][google.cloud.automl.v1beta1.PredictionService.BatchPredict].
 message BatchPredictResult {
   // Additional domain-specific prediction response metadata.
   //
diff --git a/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2.py b/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2.py
index 589a74ba7fe4..751f16ef8f5b 100644
--- a/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2.py
+++ b/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2.py
@@ -16,6 +16,7 @@
 
 
 from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
+from google.api import client_pb2 as google_dot_api_dot_client__pb2
 from google.cloud.automl_v1beta1.proto import (
     annotation_payload_pb2 as google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_annotation__payload__pb2,
 )
@@ -31,7 +32,6 @@
 from google.longrunning import (
     operations_pb2 as google_dot_longrunning_dot_operations__pb2,
 )
-from google.api import client_pb2 as google_dot_api_dot_client__pb2
 
 
 DESCRIPTOR = _descriptor.FileDescriptor(
@@ -42,16 +42,16 @@
         "\n\037com.google.cloud.automl.v1beta1B\026PredictionServiceProtoP\001ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\312\002\033Google\\Cloud\\AutoMl\\V1beta1\352\002\036Google::Cloud::AutoML::V1beta1"
     ),
     serialized_pb=_b(
-        '\n:google/cloud/automl_v1beta1/proto/prediction_service.proto\x12\x1bgoogle.cloud.automl.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a:google/cloud/automl_v1beta1/proto/annotation_payload.proto\x1a\x32google/cloud/automl_v1beta1/proto/data_items.proto\x1a*google/cloud/automl_v1beta1/proto/io.proto\x1a\x32google/cloud/automl_v1beta1/proto/operations.proto\x1a#google/longrunning/operations.proto\x1a\x17google/api/client.proto"\xd4\x01\n\x0ePredictRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12<\n\x07payload\x18\x02 \x01(\x0b\x32+.google.cloud.automl.v1beta1.ExamplePayload\x12G\n\x06params\x18\x03 \x03(\x0b\x32\x37.google.cloud.automl.v1beta1.PredictRequest.ParamsEntry\x1a-\n\x0bParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\xd1\x01\n\x0fPredictResponse\x12?\n\x07payload\x18\x01 \x03(\x0b\x32..google.cloud.automl.v1beta1.AnnotationPayload\x12L\n\x08metadata\x18\x02 \x03(\x0b\x32:.google.cloud.automl.v1beta1.PredictResponse.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\xba\x02\n\x13\x42\x61tchPredictRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\x0cinput_config\x18\x03 \x01(\x0b\x32\x34.google.cloud.automl.v1beta1.BatchPredictInputConfig\x12L\n\routput_config\x18\x04 \x01(\x0b\x32\x35.google.cloud.automl.v1beta1.BatchPredictOutputConfig\x12L\n\x06params\x18\x05 \x03(\x0b\x32<.google.cloud.automl.v1beta1.BatchPredictRequest.ParamsEntry\x1a-\n\x0bParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\x96\x01\n\x12\x42\x61tchPredictResult\x12O\n\x08metadata\x18\x01 \x03(\x0b\x32=.google.cloud.automl.v1beta1.BatchPredictResult.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x32\xb4\x03\n\x11PredictionService\x12\xa8\x01\n\x07Predict\x12+.google.cloud.automl.v1beta1.PredictRequest\x1a,.google.cloud.automl.v1beta1.PredictResponse"B\x82\xd3\xe4\x93\x02<"7/v1beta1/{name=projects/*/locations/*/models/*}:predict:\x01*\x12\xa8\x01\n\x0c\x42\x61tchPredict\x12\x30.google.cloud.automl.v1beta1.BatchPredictRequest\x1a\x1d.google.longrunning.Operation"G\x82\xd3\xe4\x93\x02\x41"</v1beta1/{name=projects/*/locations/*/models/*}:batchPredict:\x01*\x1aI\xca\x41\x15\x61utoml.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\xbd\x01\n\x1f\x63om.google.cloud.automl.v1beta1B\x16PredictionServiceProtoP\x01ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\xca\x02\x1bGoogle\\Cloud\\AutoMl\\V1beta1\xea\x02\x1eGoogle::Cloud::AutoML::V1beta1b\x06proto3'
+        '\n:google/cloud/automl_v1beta1/proto/prediction_service.proto\x12\x1bgoogle.cloud.automl.v1beta1\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a:google/cloud/automl_v1beta1/proto/annotation_payload.proto\x1a\x32google/cloud/automl_v1beta1/proto/data_items.proto\x1a*google/cloud/automl_v1beta1/proto/io.proto\x1a\x32google/cloud/automl_v1beta1/proto/operations.proto\x1a#google/longrunning/operations.proto"\xd4\x01\n\x0ePredictRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12<\n\x07payload\x18\x02 \x01(\x0b\x32+.google.cloud.automl.v1beta1.ExamplePayload\x12G\n\x06params\x18\x03 \x03(\x0b\x32\x37.google.cloud.automl.v1beta1.PredictRequest.ParamsEntry\x1a-\n\x0bParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\x9a\x02\n\x0fPredictResponse\x12?\n\x07payload\x18\x01 \x03(\x0b\x32..google.cloud.automl.v1beta1.AnnotationPayload\x12G\n\x12preprocessed_input\x18\x03 \x01(\x0b\x32+.google.cloud.automl.v1beta1.ExamplePayload\x12L\n\x08metadata\x18\x02 \x03(\x0b\x32:.google.cloud.automl.v1beta1.PredictResponse.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\xba\x02\n\x13\x42\x61tchPredictRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\x0cinput_config\x18\x03 \x01(\x0b\x32\x34.google.cloud.automl.v1beta1.BatchPredictInputConfig\x12L\n\routput_config\x18\x04 \x01(\x0b\x32\x35.google.cloud.automl.v1beta1.BatchPredictOutputConfig\x12L\n\x06params\x18\x05 \x03(\x0b\x32<.google.cloud.automl.v1beta1.BatchPredictRequest.ParamsEntry\x1a-\n\x0bParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"\x96\x01\n\x12\x42\x61tchPredictResult\x12O\n\x08metadata\x18\x01 \x03(\x0b\x32=.google.cloud.automl.v1beta1.BatchPredictResult.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x32\xb4\x03\n\x11PredictionService\x12\xa8\x01\n\x07Predict\x12+.google.cloud.automl.v1beta1.PredictRequest\x1a,.google.cloud.automl.v1beta1.PredictResponse"B\x82\xd3\xe4\x93\x02<"7/v1beta1/{name=projects/*/locations/*/models/*}:predict:\x01*\x12\xa8\x01\n\x0c\x42\x61tchPredict\x12\x30.google.cloud.automl.v1beta1.BatchPredictRequest\x1a\x1d.google.longrunning.Operation"G\x82\xd3\xe4\x93\x02\x41"</v1beta1/{name=projects/*/locations/*/models/*}:batchPredict:\x01*\x1aI\xca\x41\x15\x61utoml.googleapis.com\xd2\x41.https://www.googleapis.com/auth/cloud-platformB\xbd\x01\n\x1f\x63om.google.cloud.automl.v1beta1B\x16PredictionServiceProtoP\x01ZAgoogle.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl\xca\x02\x1bGoogle\\Cloud\\AutoMl\\V1beta1\xea\x02\x1eGoogle::Cloud::AutoML::V1beta1b\x06proto3'
     ),
     dependencies=[
         google_dot_api_dot_annotations__pb2.DESCRIPTOR,
+        google_dot_api_dot_client__pb2.DESCRIPTOR,
         google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_annotation__payload__pb2.DESCRIPTOR,
         google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_data__items__pb2.DESCRIPTOR,
         google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_io__pb2.DESCRIPTOR,
         google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_operations__pb2.DESCRIPTOR,
         google_dot_longrunning_dot_operations__pb2.DESCRIPTOR,
-        google_dot_api_dot_client__pb2.DESCRIPTOR,
     ],
 )
 
@@ -239,8 +239,8 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=769,
-    serialized_end=816,
+    serialized_start=842,
+    serialized_end=889,
 )
 
 _PREDICTRESPONSE = _descriptor.Descriptor(
@@ -268,10 +268,28 @@
             serialized_options=None,
             file=DESCRIPTOR,
         ),
+        _descriptor.FieldDescriptor(
+            name="preprocessed_input",
+            full_name="google.cloud.automl.v1beta1.PredictResponse.preprocessed_input",
+            index=1,
+            number=3,
+            type=11,
+            cpp_type=10,
+            label=1,
+            has_default_value=False,
+            default_value=None,
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            serialized_options=None,
+            file=DESCRIPTOR,
+        ),
         _descriptor.FieldDescriptor(
             name="metadata",
             full_name="google.cloud.automl.v1beta1.PredictResponse.metadata",
-            index=1,
+            index=2,
             number=2,
             type=11,
             cpp_type=10,
@@ -296,7 +314,7 @@
     extension_ranges=[],
     oneofs=[],
     serialized_start=607,
-    serialized_end=816,
+    serialized_end=889,
 )
 
 
@@ -444,8 +462,8 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=819,
-    serialized_end=1133,
+    serialized_start=892,
+    serialized_end=1206,
 )
 
 
@@ -501,8 +519,8 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=769,
-    serialized_end=816,
+    serialized_start=842,
+    serialized_end=889,
 )
 
 _BATCHPREDICTRESULT = _descriptor.Descriptor(
@@ -539,8 +557,8 @@
     syntax="proto3",
     extension_ranges=[],
     oneofs=[],
-    serialized_start=1136,
-    serialized_end=1286,
+    serialized_start=1209,
+    serialized_end=1359,
 )
 
 _PREDICTREQUEST_PARAMSENTRY.containing_type = _PREDICTREQUEST
@@ -556,6 +574,11 @@
 ].message_type = (
     google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_annotation__payload__pb2._ANNOTATIONPAYLOAD
 )
+_PREDICTRESPONSE.fields_by_name[
+    "preprocessed_input"
+].message_type = (
+    google_dot_cloud_dot_automl__v1beta1_dot_proto_dot_data__items__pb2._EXAMPLEPAYLOAD
+)
 _PREDICTRESPONSE.fields_by_name[
     "metadata"
 ].message_type = _PREDICTRESPONSE_METADATAENTRY
@@ -656,6 +679,12 @@
       payload:
           Prediction result. Translation and Text Sentiment will return
           precisely one payload.
+      preprocessed_input:
+          The preprocessed example that AutoML actually makes prediction
+          on. Empty if AutoML does not preprocess the input example. \*
+          For Text Extraction: If the input is a .pdf file, the OCR'ed
+          text will be provided in [document\_text][google.cloud.automl.
+          v1beta1.Document.document\_text].
       metadata:
           Additional domain-specific prediction response metadata.  -
           For Image Object Detection: ``max_bounding_box_count`` -
@@ -817,8 +846,8 @@
     serialized_options=_b(
         "\312A\025automl.googleapis.com\322A.https://www.googleapis.com/auth/cloud-platform"
     ),
-    serialized_start=1289,
-    serialized_end=1725,
+    serialized_start=1362,
+    serialized_end=1798,
     methods=[
         _descriptor.MethodDescriptor(
             name="Predict",
diff --git a/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2_grpc.py b/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2_grpc.py
index 9f6bf57d385f..8049017e1bcb 100644
--- a/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2_grpc.py
+++ b/automl/google/cloud/automl_v1beta1/proto/prediction_service_pb2_grpc.py
@@ -67,12 +67,14 @@ def Predict(self, request, context):
         raise NotImplementedError("Method not implemented!")
 
     def BatchPredict(self, request, context):
-        """Perform a batch prediction. Unlike the online [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
+        """Perform a batch prediction. Unlike the online
+    [Predict][google.cloud.automl.v1beta1.PredictionService.Predict], batch
     prediction result won't be immediately available in the response. Instead,
     a long running operation object is returned. User can poll the operation
     result via [GetOperation][google.longrunning.Operations.GetOperation]
-    method. Once the operation is done, [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is returned in
-    the [response][google.longrunning.Operation.response] field.
+    method. Once the operation is done,
+    [BatchPredictResult][google.cloud.automl.v1beta1.BatchPredictResult] is
+    returned in the [response][google.longrunning.Operation.response] field.
     Available for following ML problems:
     * Image Classification
     * Image Object Detection
diff --git a/automl/synth.metadata b/automl/synth.metadata
index 86203594b3f4..7cf7214eecc8 100644
--- a/automl/synth.metadata
+++ b/automl/synth.metadata
@@ -1,19 +1,19 @@
 {
-  "updateTime": "2019-08-15T16:50:20.093543Z",
+  "updateTime": "2019-08-26T23:45:30.964132Z",
   "sources": [
     {
       "generator": {
         "name": "artman",
-        "version": "0.33.0",
-        "dockerImage": "googleapis/artman@sha256:c6231efb525569736226b1f7af7565dbc84248efafb3692a5bb1d2d8a7975d53"
+        "version": "0.35.0",
+        "dockerImage": "googleapis/artman@sha256:97ef134b6b1cc2c21868960d3b4352524023fb25b61fc137ca0783ce3c08c2cd"
       }
     },
     {
       "git": {
         "name": "googleapis",
         "remote": "https://github.com/googleapis/googleapis.git",
-        "sha": "3406d1e899f1f41123b3fa9210ad4bef25c9a720",
-        "internalRef": "263234709"
+        "sha": "650caad718bb063f189405c23972dc9818886358",
+        "internalRef": "265565344"
       }
     },
     {
diff --git a/automl/synth.py b/automl/synth.py
index aad3d09483fc..5a4c81241de3 100644
--- a/automl/synth.py
+++ b/automl/synth.py
@@ -91,9 +91,9 @@
 s.replace(
     "google/cloud/**/io_pb2.py",
     r"""Sample
-     in-line JSON Lines file.*?\}`\n""",
-    "\n",
-    flags=re.DOTALL,
+     in-line JSON Lines.*?(\n\s+-\s+For Text Classification.*\n)""",
+     "\g<1>",
+     flags=re.DOTALL
 )
 
 # Replace docstring with no summary line