Skip to content

Commit

Permalink
[Syosetu] Extract all chapters
Browse files Browse the repository at this point in the history
  • Loading branch information
jacktheripper19 authored Nov 29, 2024
1 parent 57adb36 commit 16518cc
Showing 1 changed file with 95 additions and 22 deletions.
117 changes: 95 additions & 22 deletions src/plugins/japanese/Syosetu.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import * as cheerio from 'cheerio';
import { load as loadCheerio } from 'cheerio';
import { fetchApi } from '@libs/fetch';
import { Plugin } from '@typings/plugin';
Expand Down Expand Up @@ -69,15 +70,42 @@ class Syosetu implements Plugin.PluginBase {
const novels = await getNovelsFromPage(pageNo);
return novels;
}
async parseNovel(novelPath: string): Promise<Plugin.SourceNovel> {
private async parseChaptersFromPage(
loadedCheerio: cheerio.CheerioAPI,
): Promise<Plugin.ChapterItem[]> {
const chapters: Plugin.ChapterItem[] = [];

loadedCheerio('.p-eplist__sublist').each((_, element) => {
const chapterLink = loadedCheerio(element).find('a');
const chapterUrl = chapterLink.attr('href');
const chapterName = chapterLink.text().trim();
const releaseDate = loadedCheerio(element)
.find('.p-eplist__update')
.text()
.trim()
.split(' ')[0]
.replace(/\//g, '-');

if (chapterUrl) {
chapters.push({
name: chapterName,
releaseTime: releaseDate,
path: chapterUrl.replace(this.novelPrefix, ''),
});
}
});

return chapters;
}
async parseNovel(novelPath: string): Promise<Plugin.SourceNovel> {
// First fetch main page
const result = await fetchApi(this.novelPrefix + novelPath, {
headers: this.headers,
});
const body = await result.text();
const loadedCheerio = loadCheerio(body, { decodeEntities: false });

// Parse novel status
// Parse status
let status = 'Unknown';
if (loadedCheerio('.c-announce').text().includes('連載中')) {
status = 'Ongoing';
Expand All @@ -87,7 +115,7 @@ class Syosetu implements Plugin.PluginBase {
status = 'On Hiatus';
}

// Create novel object with basic metadata
// Create novel object with metadata
const novel: Plugin.SourceNovel = {
path: novelPath,
name: loadedCheerio('.p-novel__title').text(),
Expand All @@ -96,32 +124,77 @@ class Syosetu implements Plugin.PluginBase {
.replace('作者:', '')
.trim(),
status: status,
artist: '', // Not available on syosetu
artist: '',
cover: defaultCover,
chapters: [],
};

// Get summary if available
novel.summary = loadedCheerio('#novel_ex').text().trim();

// Parse chapters using the correct selectors
loadedCheerio('.p-eplist__sublist').each((_, element) => {
const chapterLink = loadedCheerio(element).find('a');
const chapterUrl = chapterLink.attr('href');
const chapterName = chapterLink.text().trim();
const releaseDate = loadedCheerio(element)
.find('.p-eplist__update')
.text()
.trim()
.split(' ')[0] // Get just the date part
.replace(/\//g, '-'); // Format date as YYYY-MM-DD
if (chapterUrl) {
chapters.push({
name: chapterName,
releaseTime: releaseDate,
path: chapterUrl.replace(this.novelPrefix, ''),
const chapters: Plugin.ChapterItem[] = [];

// Get last page URL first
const lastPageLink = loadedCheerio('.c-pager__item--last').attr('href');

if (!lastPageLink) {
// If no pagination, just parse chapters from the current page
loadedCheerio('.p-eplist__sublist').each((_, element) => {
const chapterLink = loadedCheerio(element).find('a');
const chapterUrl = chapterLink.attr('href');
const chapterName = chapterLink.text().trim();
const releaseDate = loadedCheerio(element)
.find('.p-eplist__update')
.text()
.trim()
.split(' ')[0]
.replace(/\//g, '-');

if (chapterUrl) {
chapters.push({
name: chapterName,
releaseTime: releaseDate,
path: chapterUrl.replace(this.novelPrefix, ''),
});
}
});
} else {
const lastPageMatch = lastPageLink.match(/\?p=(\d+)/);
const totalPages = lastPageMatch ? parseInt(lastPageMatch[1]) : 1;

// Fetch all pages in parallel for better performance
const pagePromises = Array.from({ length: totalPages }, (_, i) =>
fetchApi(`${this.novelPrefix}${novelPath}?p=${i + 1}`).then(r =>
r.text(),
),
);

const pageResults = await Promise.all(pagePromises);

// Process each page's chapters
pageResults.forEach(pageBody => {
const pageCheerio = loadCheerio(pageBody, { decodeEntities: false });
pageCheerio('.p-eplist__sublist').each((_, element) => {
const chapterLink = pageCheerio(element).find('a');
const chapterUrl = chapterLink.attr('href');
const chapterName = chapterLink.text().trim();
const releaseDate = pageCheerio(element)
.find('.p-eplist__update')
.text()
.trim()
.split(' ')[0]
.replace(/\//g, '-');

if (chapterUrl) {
chapters.push({
name: chapterName,
releaseTime: releaseDate,
path: chapterUrl.replace(this.novelPrefix, ''),
});
}
});
}
});
});
}

novel.chapters = chapters;
return novel;
Expand Down

0 comments on commit 16518cc

Please sign in to comment.