Skip to content

Commit

Permalink
Added Tomato Leaf Disease Dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
varunUCDavis committed Oct 12, 2024
1 parent 725f7d4 commit 3ec461f
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 0 deletions.
29 changes: 29 additions & 0 deletions agml/_assets/public_datasources.json
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,35 @@
]
}
},
"tomato_leaf_disease": {
"classes": {
"1": "Bacterial Spot",
"2": "Early Blight",
"3": "Healthy",
"4": "Late Blight",
"5": "Leaf Mold",
"6": "Septoria Leaf Spot",
"7": "Spider Mites Two-spotted Spider Mite",
"8": "Target Spot",
"9": "Tomato Mosaic Virus",
"10": "Tomato Yellow Leaf Curl Virus"
},
"ml_task": "image_classification",
"ag_task": "disease_classification",
"location": {
"continent": "worldwide",
"country": "worldwide"
},
"sensor_modality": "rgb",
"real_synthetic": "real",
"platform": "handheld",
"input_data_format": [
"jpeg"
],
"annotation_format": "directory_names",
"n_images": "11000",
"docs_url": "https://www.kaggle.com/datasets/kaustubhb999/tomatoleaf?resource=download"
},
"vine_virus_photo_dataset": {
"classes": {
"1": "Leafroll 3",
Expand Down
4 changes: 4 additions & 0 deletions agml/_assets/source_citations.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@
"license": "",
"citation": "Singh D, Jain N, Jain P, Kayal P, Kumawat S, Batra N. PlantDoc: a dataset for visual plant disease detection. InProceedings of the 7th ACM IKDD CoDS and 25th COMAD 2020 Jan 5 (pp. 249-253)."
},
"tomato_leaf_disease": {
"license": "CC0: Public Domain",
"citation": ""
},
"vine_virus_photo_dataset": {
"license": "Apache 2.0",
"citation": ""
Expand Down
45 changes: 45 additions & 0 deletions agml/_internal/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,51 @@ def corn_maize_leaf_disease(self, dataset_name):
processed_image.save(os.path.join(output_path, cls, img))

print(f"Dataset {dataset_name} has been preprocessed and saved to {output_path}")

def tomato_leaf_disease(self, dataset_name):
"""Preprocesses the Tomato Leaf Disease Dataset."""
# Get the dataset directory directly (no need for 'original' directory)
base_path = self.data_dir
classes = sorted([d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))])

# Create output directory
output_path = os.path.join(self.data_processed_dir, dataset_name)
if not os.path.exists(output_path):
os.makedirs(output_path)

# Create subdirectories for each class inside the output directory
for cls in classes:
class_output_path = os.path.join(output_path, cls)
if not os.path.exists(class_output_path):
os.makedirs(class_output_path)

# Process and copy the dataset images to the processed directory
for cls in classes:
class_path = os.path.join(base_path, cls)
for img in os.listdir(class_path):
if img.endswith(('jpg', 'png', 'jpeg', 'JPG')):
img_path = os.path.join(class_path, img)

# Open the image using Pillow
with Image.open(img_path) as image:
# Convert image to a NumPy array
img_array = np.array(image)

# Ensure the image is in range [0, 255] and dtype is uint8
if img_array.dtype != np.uint8:
# If the image is in float [0-1], scale it to [0-255]
img_array = (img_array * 255).astype(np.uint8)

# Convert back to PIL Image to save
processed_image = Image.fromarray(img_array)

if processed_image.mode == 'RGBA':
processed_image = processed_image.convert('RGB')

# Save the processed image to the output directory
processed_image.save(os.path.join(output_path, cls, img))

print(f"Dataset {dataset_name} has been preprocessed and saved to {output_path}")

def bean_disease_uganda(self, dataset_name):
# Get the dataset classes and paths
Expand Down

0 comments on commit 3ec461f

Please sign in to comment.