Skip to content

Commit

Permalink
PyOpenCL target: Add, test overflow of large argument counts into SVM…
Browse files Browse the repository at this point in the history
… struct
  • Loading branch information
inducer committed Jul 2, 2022
1 parent 90b0303 commit ae9eb7c
Show file tree
Hide file tree
Showing 9 changed files with 420 additions and 79 deletions.
21 changes: 18 additions & 3 deletions loopy/codegen/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@
THE SOFTWARE.
"""

from typing import Any, Sequence, Mapping, Tuple, Optional
from typing import Any, Sequence, Mapping, Tuple, Optional, TYPE_CHECKING
from dataclasses import dataclass, replace

import islpy as isl


if TYPE_CHECKING:
from loopy.codegen import CodeGenerationState


def process_preambles(preambles: Sequence[Tuple[int, str]]) -> Sequence[str]:
seen_preamble_tags = set()
dedup_preambles = []
Expand Down Expand Up @@ -170,7 +174,8 @@ def all_code(self):
+ "\n\n"
+ str(self.host_program.ast))

def current_program(self, codegen_state):
def current_program(
self, codegen_state: "CodeGenerationState") -> GeneratedProgram:
if codegen_state.is_generating_device_code:
if self.device_programs:
result = self.device_programs[-1]
Expand Down Expand Up @@ -329,13 +334,23 @@ def generate_host_or_device_program(codegen_state, schedule_index):

cur_prog = codegen_result.current_program(codegen_state)
body_ast = cur_prog.ast
fdecl_ast = ast_builder.get_function_declaration(
fdef_preambles, fdecl_ast = ast_builder.get_function_declaration(
codegen_state, codegen_result, schedule_index)

fdef_ast = ast_builder.get_function_definition(
codegen_state, codegen_result,
schedule_index, fdecl_ast, body_ast)

if fdef_preambles:
if codegen_state.is_generating_device_code:
codegen_result = codegen_result.copy(
device_preambles=(
codegen_result.device_preambles + tuple(fdef_preambles)))
else:
codegen_result = codegen_result.copy(
host_preambles=(
codegen_result.host_preambles + tuple(fdef_preambles)))

codegen_result = codegen_result.with_new_program(
codegen_state,
cur_prog.copy(
Expand Down
13 changes: 8 additions & 5 deletions loopy/target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ def get_function_definition(
def get_function_declaration(
self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int
) -> ASTType:
) -> Tuple[Sequence[Tuple[str, str]], ASTType]:
"""Returns preambles and the AST for the function declaration."""
raise NotImplementedError

def generate_top_of_body(
Expand Down Expand Up @@ -289,14 +290,16 @@ def __str__(self):
return ""


class DummyHostASTBuilder(ASTBuilderBase):
class DummyHostASTBuilder(ASTBuilderBase[None]):
def get_function_definition(self, codegen_state, codegen_result,
schedule_index, function_decl, function_body):
return function_body

def get_function_declaration(self, codegen_state, codegen_result,
schedule_index):
return None
def get_function_declaration(
self, codegen_state, codegen_result,
schedule_index,
) -> Tuple[Sequence[Tuple[str, str]], None]:
return [], None

def get_temporary_decls(self, codegen_state, schedule_index):
return []
Expand Down
10 changes: 6 additions & 4 deletions loopy/target/c/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
THE SOFTWARE.
"""

from typing import cast, Tuple, Optional
from typing import cast, Tuple, Optional, Sequence
import re

import numpy as np # noqa
Expand Down Expand Up @@ -817,8 +817,10 @@ def get_function_definition(
else:
return Collection(result+[Line(), fbody])

def get_function_declaration(self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int) -> Generable:
def get_function_declaration(
self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int
) -> Tuple[Sequence[Tuple[str, str]], Generable]:
kernel = codegen_state.kernel

assert codegen_state.kernel.linearization is not None
Expand Down Expand Up @@ -846,7 +848,7 @@ def get_function_declaration(self, codegen_state: CodeGenerationState,
passed_names = [arg.name for arg in kernel.args]
written_names = kernel.get_written_variables()

return FunctionDeclarationWrapper(
return [], FunctionDeclarationWrapper(
FunctionDeclaration(
name,
[self.arg_to_cgen_declarator(
Expand Down
16 changes: 11 additions & 5 deletions loopy/target/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
THE SOFTWARE.
"""

from typing import Tuple, Sequence

import numpy as np
from pymbolic import var
from pytools import memoize_method
from cgen import Declarator, Const
from cgen import Declarator, Const, Generable

from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
Expand All @@ -35,6 +37,8 @@
from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag, VectorArrayDimTag
from loopy.kernel.data import AddressSpace, ImageArg, ConstantArg, ArrayArg
from loopy.kernel.function_interface import ScalarCallable
from loopy.codegen.result import CodeGenerationResult
from loopy.codegen import CodeGenerationState


# {{{ vector types
Expand Down Expand Up @@ -320,9 +324,11 @@ def known_callables(self):

# {{{ top-level codegen

def get_function_declaration(self, codegen_state, codegen_result,
schedule_index):
fdecl = super().get_function_declaration(
def get_function_declaration(
self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int
) -> Tuple[Sequence[Tuple[str, str]], Generable]:
preambles, fdecl = super().get_function_declaration(
codegen_state, codegen_result, schedule_index)

from loopy.target.c import FunctionDeclarationWrapper
Expand Down Expand Up @@ -352,7 +358,7 @@ def get_function_declaration(self, codegen_state, codegen_result,

fdecl = CudaLaunchBounds(nthreads, fdecl)

return FunctionDeclarationWrapper(fdecl)
return preambles, FunctionDeclarationWrapper(fdecl)

def preamble_generators(self):

Expand Down
10 changes: 6 additions & 4 deletions loopy/target/ispc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"""


from typing import cast, Tuple
from typing import cast, Tuple, Sequence

import numpy as np # noqa
import pymbolic.primitives as p
Expand Down Expand Up @@ -202,8 +202,10 @@ def get_dtype_registry(self):
class ISPCASTBuilder(CFamilyASTBuilder):
# {{{ top-level codegen

def get_function_declaration(self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int) -> Generable:
def get_function_declaration(
self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int
) -> Tuple[Sequence[Tuple[str, str]], Generable]:
name = codegen_result.current_program(codegen_state).name
kernel = codegen_state.kernel

Expand Down Expand Up @@ -243,7 +245,7 @@ def get_function_declaration(self, codegen_state: CodeGenerationState,
arg_decls))

from loopy.target.c import FunctionDeclarationWrapper
return FunctionDeclarationWrapper(result)
return [], FunctionDeclarationWrapper(result)

def get_kernel_call(self, codegen_state: CodeGenerationState,
subkernel_name: str,
Expand Down
24 changes: 17 additions & 7 deletions loopy/target/opencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
THE SOFTWARE.
"""

from typing import Tuple, Sequence

import numpy as np
from pymbolic import var
from pytools import memoize_method
from cgen import Declarator
from cgen import Declarator, Generable

from loopy.target.c import CFamilyTarget, CFamilyASTBuilder
from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper
Expand All @@ -36,6 +38,8 @@
from loopy.kernel.array import VectorArrayDimTag, FixedStrideArrayDimTag, ArrayBase
from loopy.kernel.data import AddressSpace, ImageArg, ConstantArg
from loopy.kernel.function_interface import ScalarCallable
from loopy.codegen import CodeGenerationState
from loopy.codegen.result import CodeGenerationResult


# {{{ dtype registry wrappers
Expand Down Expand Up @@ -624,20 +628,26 @@ def preamble_generators(self):

# {{{ top-level codegen

def get_function_declaration(self, codegen_state, codegen_result,
schedule_index):
fdecl = super().get_function_declaration(
def get_function_declaration(
self, codegen_state: CodeGenerationState,
codegen_result: CodeGenerationResult, schedule_index: int
) -> Tuple[Sequence[Tuple[str, str]], Generable]:
preambles, fdecl = super().get_function_declaration(
codegen_state, codegen_result, schedule_index)

from loopy.target.c import FunctionDeclarationWrapper
assert isinstance(fdecl, FunctionDeclarationWrapper)
if not codegen_state.is_entrypoint:
# auxiliary kernels need not mention opencl speicific qualifiers
# for a functions signature
return fdecl
return preambles, fdecl

fdecl = fdecl.subdecl
return preambles, FunctionDeclarationWrapper(
self._wrap_kernel_decl(codegen_state, schedule_index, fdecl.subdecl))

def _wrap_kernel_decl(
self, codegen_state: CodeGenerationState, schedule_index: int,
fdecl: Declarator) -> Declarator:
from cgen.opencl import CLKernel, CLRequiredWorkGroupSize
fdecl = CLKernel(fdecl)

Expand All @@ -654,7 +664,7 @@ def get_function_declaration(self, codegen_state, codegen_result,

fdecl = CLRequiredWorkGroupSize(local_sizes, fdecl)

return FunctionDeclarationWrapper(fdecl)
return fdecl

def generate_top_of_body(self, codegen_state):
from loopy.kernel.data import ImageArg
Expand Down
Loading

0 comments on commit ae9eb7c

Please sign in to comment.