Skip to content

Commit

Permalink
Create hadoop plugin for hadoop common and hdfs, add hdfs plugin and …
Browse files Browse the repository at this point in the history
…tests
  • Loading branch information
simisimon committed Sep 28, 2024
1 parent 04bbb45 commit 729316c
Show file tree
Hide file tree
Showing 8 changed files with 286 additions and 134 deletions.
2 changes: 1 addition & 1 deletion src/cfgnet/config_types/config_type_inferer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def get_config_type( # noqa: C901
return ConfigType.USERNAME

if bool(
re.match(ConfigTypeInferer.regex_size_option, option_name)
re.search(ConfigTypeInferer.regex_size_option, option_name)
) and bool(re.fullmatch(ConfigTypeInferer.regex_size_value, value)):
return ConfigType.SIZE

Expand Down
134 changes: 2 additions & 132 deletions src/cfgnet/plugins/concept/hadoop_common_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,144 +12,14 @@
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

import logging
import os

from typing import Optional
from lxml import etree as ET
from lxml.etree import _Element

from cfgnet.config_types.config_type_inferer import ConfigTypeInferer
from cfgnet.config_types.config_types import ConfigType
from cfgnet.network.nodes import (
ArtifactNode,
Node,
OptionNode,
ProjectNode,
ValueNode,
)
from cfgnet.plugins.plugin import Plugin
from cfgnet.plugins.file_type.hadoop_plugin import HadoopPlugin


class HadoopCommonPlugin(Plugin):
class HadoopCommonPlugin(HadoopPlugin):
def __init__(self):
super().__init__("hadoop-common")

def _parse_config_file(
self,
abs_file_path: str,
rel_file_path: str,
root: Optional[ProjectNode],
) -> ArtifactNode:
artifact = ArtifactNode(
file_path=abs_file_path,
rel_file_path=rel_file_path,
concept_name=self.concept_name,
project_root=root,
)

try:
tree = ET.parse(abs_file_path)
tree_root = tree.getroot()

# Remove namespace prefixes
for elem in tree_root.getiterator():
if elem.tag is not ET.Comment:
elem.tag = ET.QName(elem).localname
# Remove unused namespace declarations
ET.cleanup_namespaces(tree_root)

option_root = OptionNode(
tree_root.tag,
tree_root.sourceline,
ConfigTypeInferer.get_config_type(tree_root.tag, ""),
)
artifact.add_child(option_root)
for child in tree_root:
if child.tag is not ET.Comment:
self.parse_tree(child, parent_node=option_root)

except ET.Error as error:
logging.warning(
'Failed to parse xml file "%s" due to %s', rel_file_path, error
)

return artifact

def is_responsible(self, abs_file_path: str) -> bool:
file_name = os.path.basename(abs_file_path)
return file_name == "core-site.xml"

def parse_tree(self, subtree: _Element, parent_node: Node):
name = subtree.tag

if name:
if name == "property":
config_type = ConfigTypeInferer.get_config_type(name, "")
property_option = OptionNode(
name, subtree.sourceline, config_type
)
parent_node.add_child(property_option)

property_name = None
property_value = None
property_description = None

# Capture property details
for child in subtree:
if child.tag == "name":
property_name = child.text.strip()
elif child.tag == "value":
property_value = child.text.strip()
elif child.tag == "description":
property_description = child.text.strip()

if property_name:
print(property_name, config_type)
option = OptionNode(
property_name, subtree.sourceline, ConfigType.UNKNOWN
)
property_option.add_child(option)

# Add the value node, under the property name
if property_value:
config_type = ConfigTypeInferer.get_config_type(
property_name, property_value
)
option_value = OptionNode(
"value", subtree.sourceline, config_type
)
option.add_child(option_value)
value_node = ValueNode(name=property_value)
option_value.add_child(value_node)

# Add the description node, under the property name
if property_description:
option_desc = OptionNode(
"description",
subtree.sourceline,
ConfigType.UNKNOWN,
)
option.add_child(option_desc)
description_node = ValueNode(name=property_description)
option_desc.add_child(description_node)

else:
config_type = ConfigTypeInferer.get_config_type(name, "")
option = OptionNode(name, subtree.sourceline, config_type)
parent_node.add_child(option)

value_name = subtree.text.strip()

if value_name:
value_node = ValueNode(name=value_name)
option.add_child(value_node)
else:
for child in subtree:
if child.tag is not ET.Comment:
self.parse_tree(child, option)

# remove option nodes without children
if not option.children:
parent_node.children.remove(option)
13 changes: 13 additions & 0 deletions src/cfgnet/plugins/concept/hadoop_hdfs_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
from cfgnet.plugins.file_type.hadoop_plugin import HadoopPlugin


class HadoopHdfsPlugin(HadoopPlugin):
def __init__(self):
super().__init__("hadoop-hdfs")

def is_responsible(self, abs_file_path: str) -> bool:
file_name = os.path.basename(abs_file_path)
return any(
file_name == name for name in ["hdfs-site.xml", "hdfs-default.xml"]
)
156 changes: 156 additions & 0 deletions src/cfgnet/plugins/file_type/hadoop_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# This file is part of the CfgNet module.
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

import logging

from typing import Optional
from lxml import etree as ET
from lxml.etree import _Element

from cfgnet.config_types.config_type_inferer import ConfigTypeInferer
from cfgnet.config_types.config_types import ConfigType
from cfgnet.network.nodes import (
ArtifactNode,
Node,
OptionNode,
ProjectNode,
ValueNode,
)
from cfgnet.plugins.plugin import Plugin


class HadoopPlugin(Plugin):
def __init__(self, name=None):
if name is None:
super().__init__("hadoop")
else:
super().__init__(name)

def _parse_config_file(
self,
abs_file_path: str,
rel_file_path: str,
root: Optional[ProjectNode],
) -> ArtifactNode:
artifact = ArtifactNode(
file_path=abs_file_path,
rel_file_path=rel_file_path,
concept_name=self.concept_name,
project_root=root,
)

try:
tree = ET.parse(abs_file_path)
tree_root = tree.getroot()

# Remove namespace prefixes
for elem in tree_root.getiterator():
if elem.tag is not ET.Comment:
elem.tag = ET.QName(elem).localname
# Remove unused namespace declarations
ET.cleanup_namespaces(tree_root)

option_root = OptionNode(
tree_root.tag,
tree_root.sourceline,
ConfigTypeInferer.get_config_type(tree_root.tag, ""),
)
artifact.add_child(option_root)
for child in tree_root:
if child.tag is not ET.Comment:
self.parse_tree(child, parent_node=option_root)

except ET.Error as error:
logging.warning(
'Failed to parse xml file "%s" due to %s', rel_file_path, error
)

return artifact

def is_responsible(self, abs_file_path: str) -> bool:
return abs_file_path.endswith(".json")

def parse_tree(self, subtree: _Element, parent_node: Node):
name = subtree.tag

if name:
if name == "property":
config_type = ConfigTypeInferer.get_config_type(name, "")
property_option = OptionNode(
name, subtree.sourceline, config_type
)
parent_node.add_child(property_option)

property_name = None
property_value = None
property_description = None

# Capture property details
for child in subtree:
if child.tag == "name":
property_name = child.text.strip()
elif child.tag == "value":
property_value = child.text.strip()
elif child.tag == "description":
property_description = child.text.strip()

if property_name:
print(property_name, config_type)
option = OptionNode(
property_name, subtree.sourceline, ConfigType.UNKNOWN
)
property_option.add_child(option)

# Add the value node, under the property name
if property_value:
config_type = ConfigTypeInferer.get_config_type(
property_name, property_value
)
option_value = OptionNode(
"value", subtree.sourceline, config_type
)
option.add_child(option_value)
value_node = ValueNode(name=property_value)
option_value.add_child(value_node)

# Add the description node, under the property name
if property_description:
option_desc = OptionNode(
"description",
subtree.sourceline,
ConfigType.NAME,
)
option.add_child(option_desc)
description_node = ValueNode(name=property_description)
option_desc.add_child(description_node)

else:
config_type = ConfigTypeInferer.get_config_type(name, "")
option = OptionNode(name, subtree.sourceline, config_type)
parent_node.add_child(option)

value_name = subtree.text.strip()

if value_name:
value_node = ValueNode(name=value_name)
option.add_child(value_node)
else:
for child in subtree:
if child.tag is not ET.Comment:
self.parse_tree(child, option)

# remove option nodes without children
if not option.children:
parent_node.children.remove(option)
2 changes: 2 additions & 0 deletions src/cfgnet/plugins/plugin_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from cfgnet.plugins.concept.zookeeper_plugin import ZookeeperPlugin
from cfgnet.plugins.concept.alluxio_plugin import AlluxioPlugin
from cfgnet.plugins.concept.hadoop_common_plugin import HadoopCommonPlugin
from cfgnet.plugins.concept.hadoop_hdfs_plugin import HadoopHdfsPlugin


class PluginManager:
Expand All @@ -67,6 +68,7 @@ class PluginManager:
ZookeeperPlugin(),
AlluxioPlugin(),
HadoopCommonPlugin(),
HadoopHdfsPlugin(),
]

file_type_plugins: List[Plugin] = [
Expand Down
Loading

0 comments on commit 729316c

Please sign in to comment.