diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6b427ed1da834..fac607f4621a8 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -898,23 +898,94 @@ def str_join(arr, sep): def str_findall(arr, pat, flags=0): """ - Find all occurrences of pattern or regular expression in the - Series/Index. Equivalent to :func:`re.findall`. + Find all occurrences of pattern or regular expression in the Series/Index. + + Equivalent to applying :func:`re.findall` to all the elements in the + Series/Index. Parameters ---------- pat : string - Pattern or regular expression - flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE + Pattern or regular expression. + flags : int, default 0 + ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means + no flags). Returns ------- - matches : Series/Index of lists + Series/Index of lists of strings + All non-overlapping matches of pattern or regular expression in each + string of this Series/Index. See Also -------- - extractall : returns DataFrame with one column per capture group + count : Count occurrences of pattern or regular expression in each string + of the Series/Index. + extractall : For each string in the Series, extract groups from all matches + of regular expression and return a DataFrame with one row for each + match and one column for each group. + re.findall : The equivalent ``re`` function to all non-overlapping matches + of pattern or regular expression in string, as a list of strings. + + Examples + -------- + + >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + + The search for the pattern 'Monkey' returns one match: + + >>> s.str.findall('Monkey') + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + On the other hand, the search for the pattern 'MONKEY' doesn't return any + match: + + >>> s.str.findall('MONKEY') + 0 [] + 1 [] + 2 [] + dtype: object + + Flags can be added to the pattern or regular expression. For instance, + to find the pattern 'MONKEY' ignoring the case: + + >>> import re + >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + When the pattern matches more than one string in the Series, all matches + are returned: + + >>> s.str.findall('on') + 0 [on] + 1 [on] + 2 [] + dtype: object + + Regular expressions are supported too. For instance, the search for all the + strings ending with the word 'on' is shown next: + + >>> s.str.findall('on$') + 0 [on] + 1 [] + 2 [] + dtype: object + + If the pattern is found more than once in the same string, then a list of + multiple strings is returned: + + >>> s.str.findall('b') + 0 [] + 1 [] + 2 [b, b] + dtype: object + """ regex = re.compile(pat, flags=flags) return _na_map(regex.findall, arr)