Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow the Type override to effect downstream types #1550

Merged
merged 6 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pkg/tfbridge/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,10 @@ type SchemaInfo struct {
CSharpName string

// a type to override the default; "" uses the default.
//
// If the type overridden is an object type, `Type: newName` is interpreted as a
// move operation, pointing the property to a type called `newName` and creating
// `newName` with the schema described by this type.
Type tokens.Type

// alternative types that can be used instead of the override.
Expand All @@ -424,7 +428,7 @@ type SchemaInfo struct {
// a type to override when the property is a nested structure.
NestedType tokens.Type

// an optional idemponent transformation, applied before passing to TF.
// an optional idempotent transformation, applied before passing to TF.
Transform Transformer

// a schema override for elements for arrays, maps, and sets.
Expand Down
107 changes: 84 additions & 23 deletions pkg/tfgen/generate_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,19 @@ type schemaNestedType struct {
}

type schemaNestedTypes struct {
nameToType map[string]*schemaNestedType
nameToType map[string]*schemaNestedType
pathCorrections map[string]paths.TypePath
}

func gatherSchemaNestedTypesForModule(mod *module) map[string]*schemaNestedType {
func gatherSchemaNestedTypesForModule(mod *module) (map[string]*schemaNestedType, map[string]paths.TypePath) {
nt := &schemaNestedTypes{
nameToType: make(map[string]*schemaNestedType),
nameToType: make(map[string]*schemaNestedType),
pathCorrections: make(map[string]paths.TypePath),
}
for _, member := range mod.members {
nt.gatherFromMember(member)
}
return nt.nameToType
return nt.nameToType, nt.pathCorrections
}

func gatherSchemaNestedTypesForMember(member moduleMember) map[string]*schemaNestedType {
Expand Down Expand Up @@ -112,9 +114,7 @@ type declarer interface {
Name() string
}

func (nt *schemaNestedTypes) declareType(typePath paths.TypePath, declarer declarer, namePrefix, name string,
typ *propertyType, isInput bool) string {

func (nt *schemaNestedTypes) typeName(namePrefix, name string, typ *propertyType) string {
// Generate a name for this nested type.
typeName := namePrefix + cases.Title(language.Und, cases.NoLower).String(name)

Expand All @@ -123,7 +123,21 @@ func (nt *schemaNestedTypes) declareType(typePath paths.TypePath, declarer decla
typeName = typ.nestedType.Name().String()
}

if typ.typ != "" && typ.kind == kindObject {
typeName = typ.typ.Name().String()
}

typ.name = typeName
return typ.name
}

// declareType emits the declared type into nt.
//
// declareType requires that:
// 1. nt.typeName(..., typ) has been called previously on this type.
// 2. All nested types have already been declare.
func (nt *schemaNestedTypes) declareType(typePath paths.TypePath, declarer declarer,
typ *propertyType, isInput bool) {

required := codegen.StringSet{}
for _, p := range typ.properties {
Expand All @@ -140,8 +154,9 @@ func (nt *schemaNestedTypes) declareType(typePath paths.TypePath, declarer decla
}

// Merging makes sure that structurally identical types are shared and not generated more than once.
if existing, ok := nt.nameToType[typeName]; ok {
contract.Assertf(existing.declarer == declarer || existing.typ.equals(typ), "duplicate type %v", typeName)
if existing, ok := nt.nameToType[typ.name]; ok {
contract.Assertf(existing.declarer == declarer || existing.typ.equals(typ),
"%s declared twice with different values", typ.name)

// Remember that existing type is now also seen at the current typePath.
existing.typePaths.Add(typePath)
Expand All @@ -156,18 +171,17 @@ func (nt *schemaNestedTypes) declareType(typePath paths.TypePath, declarer decla
}

existing.typ, existing.required = typ, required
return typeName
return
}

nt.nameToType[typeName] = &schemaNestedType{
nt.nameToType[typ.name] = &schemaNestedType{
typ: typ,
declarer: declarer,
required: required,
requiredInputs: requiredInputs,
requiredOutputs: requiredOutputs,
typePaths: paths.SingletonTypePathSet(typePath),
}
return typeName
}

func (nt *schemaNestedTypes) gatherFromProperties(pathContext paths.TypePath,
Expand All @@ -180,11 +194,23 @@ func (nt *schemaNestedTypes) gatherFromProperties(pathContext paths.TypePath,
name = inflector.Singularize(name)
}

nt.gatherFromPropertyType(paths.NewProperyPath(pathContext, p.propertyName),
declarer, namePrefix, name, p.typ, isInput)
var path paths.TypePath = paths.NewProperyPath(pathContext, p.propertyName)
if t := p.typ.typ; t != "" && p.typ.kind == kindObject {
namePrefix = ""
newPath := paths.NewRawPath(t, path)
nt.correctPath(path, newPath)
path = newPath
}

nt.gatherFromPropertyType(path, declarer, namePrefix, name, p.typ, isInput)
}
}

// Insert a type path redirect, setting src to point to dst.
func (nt *schemaNestedTypes) correctPath(src, dst paths.TypePath) {
nt.pathCorrections[src.UniqueKey()] = dst
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMPL nit: I'm losing track a little bit of the lifetime of this table and the need to track it. Is this to power through failing renames functionality? Does it not overlap with info already stored in paths.NewRawPath , e.g. could we get by with not using it, or else not using paths.NewRawPath, or indeed not using either that'd make me the most happy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rename builder logic expects that it can derive the name of an object type from its structural path. This is true up to these corrections, so we need to store a mapping from the old (fully structural) name to the new (user supplied & arbitrary) path.

When the rename builder walks a resource: res_r1.prop1.nested1, it needs to build the associated property path. It builds [resource: res_r1][property: prop1][property: nested1]. If prop1 was renamed, we need to recover the correction during the rename traversal so we store {[resource: res_r1][property: prop1]: [raw: userRename]} in nt.pathCorrections.

This is then used in

func (r renamesBuilder) correctPath(path paths.TypePath) paths.TypePath {
.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above. The name sems to be a function from TypePath and top-level map[string]SchemaInfo but this may be a good form of caching. No matter, I'm leaning toward removing renames from the codebase altogether and if we make that happen we can probably drop this bit as well.

}

func (nt *schemaNestedTypes) gatherFromPropertyType(typePath paths.TypePath, declarer declarer, namePrefix,
name string, typ *propertyType, isInput bool) {

Expand All @@ -195,8 +221,19 @@ func (nt *schemaNestedTypes) gatherFromPropertyType(typePath paths.TypePath, dec
declarer, namePrefix, name, typ.element, isInput)
}
case kindObject:
baseName := nt.declareType(typePath, declarer, namePrefix, name, typ, isInput)
if typ.typ != "" {
namePrefix = ""
typePath = paths.NewRawPath(typ.typ, typePath)
}

// Because nt.declareType expects the declared type to be fully formed (to
// allow an equality comparison against other fully formed types), all
// nested types must themselves be fully formed.
//
// gatherFromProperties must be called before declareType.
baseName := nt.typeName(namePrefix, name, typ)
nt.gatherFromProperties(typePath, declarer, baseName, typ.properties, isInput)
nt.declareType(typePath, declarer, typ, isInput)
}
}

Expand Down Expand Up @@ -247,13 +284,33 @@ func (g *schemaGenerator) genPackageSpec(pack *pkg) (pschema.PackageSpec, error)
spec.Attribution = fmt.Sprintf(attributionFormatString, g.info.Name, g.info.GetGitHubOrg(), g.info.GetGitHubHost())

var config []*variable
declaredTypes := map[string]*schemaNestedType{}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this deconfliction check! QQ. what if there's conflicts via some "other" keys in spec.Types than the ones generated for schemaNestedTypes or this is a non-sequitur (they're one and the same)? Thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point in the generation process, they are one and the same. Other types might be added later via ProviderInfo.ExtraTypes.

for _, mod := range pack.modules.values() {
// Generate nested types.
for _, t := range gatherSchemaNestedTypesForModule(mod) {
nestedTypes, pathCorrections := gatherSchemaNestedTypesForModule(mod)
if b := g.renamesBuilder; b != nil {
if b.pathCorrections == nil {
b.pathCorrections = pathCorrections
} else {
for k, v := range pathCorrections {
b.pathCorrections[k] = v
}
}
}
for _, t := range nestedTypes {
tok := g.genObjectTypeToken(t)
ts := g.genObjectType(t, false)
spec.Types[tok] = pschema.ComplexTypeSpec{
ObjectTypeSpec: ts,

if existing, ok := declaredTypes[tok]; ok {
if !t.typ.equals(existing.typ) {
return pschema.PackageSpec{},
fmt.Errorf("%s: conflicting type definitions", tok)
}
} else {
declaredTypes[tok] = t
spec.Types[tok] = pschema.ComplexTypeSpec{
ObjectTypeSpec: ts,
}
}
}

Expand Down Expand Up @@ -764,11 +821,13 @@ func (g *schemaGenerator) genObjectTypeToken(typInfo *schemaNestedType) string {
}

mod := modulePlacementForTypeSet(g.pkg, typInfo.typePaths)
token := fmt.Sprintf("%s/%s:%s", mod.String(), name, name)

g.renamesBuilder.registerNamedObjectType(typInfo.typePaths, tokens.Type(token))
token := tokens.Type(fmt.Sprintf("%s/%s:%s", mod, name, name))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMPL nit: naively this function is the key function that I'd expected to change in this PR. It freely decides where to put a TF type (in conjunction with the helper modulePlacementFor*). The placement is based on module and name. I'd expect we are not changing the type name. In that case we are down to:

modulePlacementForType

The way this worked before the change is looking at prefixes of paths.TypePath. So when allocating for TypePath = "res_r1.prop_1.element.foo_bar" it would recur upward "res_r1.prop_1.element", "res1_r1.prop_1", "res_r1" until it realized it's in the context of allocating a type for resource "res_r1" and pick the appropriate module based on that.

Presumably this traversal could stop early at "res_r1.prop_1" if prop_1 was given an explicit token Type by the user, and decide to allocate the module according to the user's Type token? I guess what I am saying is why does this necessitate a new clause in TypePath when this information could be looked up from map[string]*SchemaInfo and existing TypePath as written?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naively this function is the key function that I'd expected to change in this PR. It freely decides where to put a TF type (in conjunction with the helper modulePlacementFor*). The placement is based on module and name. I'd expect we are not changing the type name.

We are changing both the module and the name. Consider the following scenario:

resource res_r1:
  prop:
    sub:
     str: string

With SchemaInfo{Type: "res:custom:Obj"} applied to prop.

Direct name change:
The schema object generated for prop needs to have its name changed from R1Prop to Obj and its module changed from index to custom.

Nested name change:
The schema object generated for sub needs to have its name changed from R1PropSub to ObjSub and its module changed from index to custom.

The way this worked before the change is looking at prefixes of paths.TypePath. So when allocating for TypePath = "res_r1.prop_1.element.foo_bar" it would recur upward "res_r1.prop_1.element", "res1_r1.prop_1", "res_r1" until it realized it's in the context of allocating a type for resource "res_r1" and pick the appropriate module based on that.

The way I conceptualize this, when we allocate a type path, we see a path from a root anchor to a property. Root anchors determine the stem of the type name and the module. Previously, a root object was a resource, datasource or config property. My change adds a new root object: RawTypePath, which also determines the name stem and module for types nested below.

Presumably this traversal could stop early at "res_r1.prop_1" if prop_1 was given an explicit token Type by the user, and decide to allocate the module according to the user's Type token?

In effect, this is what happens. We just encode the stop within the path itself, instead of a look aside map.

I guess what I am saying is why does this necessitate a new clause in TypePath when this information could be looked up from map[string]*SchemaInfo and existing TypePath as written?

We need more information than is contained in map[string]*SchemaInfo, we need the full path of the object.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah this is a great clarification that names like ObjSub get affected as well, thank you! I missed that detail.

I'm still not seeing what you'd be missing from map[string]*SchemaInfo. Slightly hand-wavy purely functional definition:

func computeName(infos map[string]SchemaInfo, path) Name {
  schemaInfo := at(infos, path)
  if schemaInfo.Type != "" {
     return schemaInfo.Type.Name()
  }
  if isEmptyPath(path) {
     return ""
  }
  return computeName(infos, parentPath(path)) + capitalize(path.LastFragment)
}

at(infos, "res1.prop") == &SchemaInfo{Type: "res:custom:Obj"}

computeName(infos, "res1.prop.sub") =
computeName(infos, "res1.prop") + "Sub" =
"Obj" + "Sub" =
"ObjSub"

Compute module placement likewise.

You don't need to cache at(infos, "res1.prop") inside a new clause of TypePath seemingly because it's still there alright in the SchemaInfo tree, addressable by TypePath.

It's inefficient, yes (but that can be fixed up with recursion patterns), but what I find super appealing about factoring things out into pure functions if we can is that it makes it extremely obvious that it's independent of the traversal order through the schema tree and the order in which effects are executed against maps etc.

As things stand with the code I'm getting a slight suspicion that I'm still missing something and there is indeed some importance in the order of traversal and so forth.

This is a litlte bit in the code golf territory though so I don't want to block on it, apologies. I don't see any obvious problems where current code is incorrect.

if typ.typ != "" {
token = typ.typ
}

return token
g.renamesBuilder.registerNamedObjectType(typInfo.typePaths, token)
return token.String()
}

func (g *schemaGenerator) genObjectType(typInfo *schemaNestedType, isTopLevel bool) pschema.ObjectTypeSpec {
Expand Down Expand Up @@ -905,7 +964,7 @@ func (g *schemaGenerator) schemaType(path paths.TypePath, typ *propertyType, out
return pschema.TypeSpec{Type: "object", AdditionalProperties: &additionalProperties}
case kindObject:
mod := modulePlacementForType(g.pkg, path)
ref := fmt.Sprintf("#/types/%s/%s:%s", mod.String(), typ.name, typ.name)
ref := fmt.Sprintf("#/types/%s/%s:%s", mod, typ.name, typ.name)
return pschema.TypeSpec{Ref: ref}
default:
contract.Failf("Unrecognized type kind: %v", typ.kind)
Expand Down Expand Up @@ -1128,6 +1187,8 @@ func modulePlacementForType(pkg tokens.Package, path paths.TypePath) tokens.Modu
return parentModuleOrSelf(m)
case *paths.ConfigPath:
return tokens.NewModuleToken(pkg, configMod)
case *paths.RawTypePath:
return pp.Raw().Module()
default:
contract.Assertf(false, "invalid ParentKind")
return ""
Expand Down
Loading