From ca8b9f8ae5491aaf7242363043e1daf0c5449157 Mon Sep 17 00:00:00 2001 From: sangwonsheep Date: Tue, 17 Sep 2024 17:35:19 +0900 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20rss=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EA=B5=AC=ED=98=84,=20=EC=8A=A4=EC=BC=80=EC=A4=84=EB=A7=81=20?= =?UTF-8?q?=EC=B2=98=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/build.gradle | 60 +++++++------ .../techpick/config/RestTemplateConfig.java | 23 +++++ .../techpick/entity/article/Article.java | 4 - .../entity/article/RawCrawledArticle.java | 31 +++++-- .../rss/controller/RssController.java | 63 +++++++++++++ .../techpick/rss/dto/RssResponse.java | 43 +++++++++ .../java/kernel360/techpick/rss/dto/Url.java | 22 +++++ .../rss/repository/RssRepository.java | 14 +++ .../techpick/rss/service/RssService.java | 89 +++++++++++++++++++ 9 files changed, 311 insertions(+), 38 deletions(-) create mode 100644 backend/src/main/java/kernel360/techpick/config/RestTemplateConfig.java create mode 100644 backend/src/main/java/kernel360/techpick/rss/controller/RssController.java create mode 100644 backend/src/main/java/kernel360/techpick/rss/dto/RssResponse.java create mode 100644 backend/src/main/java/kernel360/techpick/rss/dto/Url.java create mode 100644 backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java create mode 100644 backend/src/main/java/kernel360/techpick/rss/service/RssService.java diff --git a/backend/build.gradle b/backend/build.gradle index aa135552..eb1a27ed 100644 --- a/backend/build.gradle +++ b/backend/build.gradle @@ -1,55 +1,57 @@ plugins { - id 'java' - id 'org.springframework.boot' version '3.3.3' - id 'io.spring.dependency-management' version '1.1.6' + id 'java' + id 'org.springframework.boot' version '3.3.3' + id 'io.spring.dependency-management' version '1.1.6' } group = 'kernel360' version = '0.0.1-SNAPSHOT' java { - toolchain { - languageVersion = JavaLanguageVersion.of(17) - } + toolchain { + languageVersion = JavaLanguageVersion.of(17) + } } configurations { - compileOnly { - extendsFrom annotationProcessor - } + compileOnly { + extendsFrom annotationProcessor + } } repositories { - mavenCentral() + mavenCentral() } dependencies { - // spring boot - implementation 'org.springframework.boot:spring-boot-starter-data-jpa' - implementation 'org.springframework.boot:spring-boot-starter-web' - runtimeOnly 'com.mysql:mysql-connector-j' + // spring boot + implementation 'org.springframework.boot:spring-boot-starter-data-jpa' + implementation 'org.springframework.boot:spring-boot-starter-web' + implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.17.2' - // lombok annotation - compileOnly 'org.projectlombok:lombok' - annotationProcessor 'org.projectlombok:lombok' + runtimeOnly 'com.mysql:mysql-connector-j' - // Sql logging formatter - // reference: https://www.baeldung.com/java-p6spy-intercept-sql-logging - implementation 'com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.9.2' //이쁘게 + // lombok annotation + compileOnly 'org.projectlombok:lombok' + annotationProcessor 'org.projectlombok:lombok' - // logback logger - implementation 'ch.qos.logback:logback-classic:1.4.12' - implementation 'org.slf4j:slf4j-api:2.0.3' + // Sql logging formatter + // reference: https://www.baeldung.com/java-p6spy-intercept-sql-logging + implementation 'com.github.gavlyukovskiy:p6spy-spring-boot-starter:1.9.2' //이쁘게 - // test environment - testImplementation 'org.springframework.boot:spring-boot-starter-test' - testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + // logback logger + implementation 'ch.qos.logback:logback-classic:1.4.12' + implementation 'org.slf4j:slf4j-api:2.0.3' - // springdoc swagger dependency - implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0' + // test environment + testImplementation 'org.springframework.boot:spring-boot-starter-test' + testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + + // springdoc swagger dependency + implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0' } tasks.named('test') { - useJUnitPlatform() + useJUnitPlatform() } diff --git a/backend/src/main/java/kernel360/techpick/config/RestTemplateConfig.java b/backend/src/main/java/kernel360/techpick/config/RestTemplateConfig.java new file mode 100644 index 00000000..249e1749 --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/config/RestTemplateConfig.java @@ -0,0 +1,23 @@ +package kernel360.techpick.config; + +import java.time.Duration; + +import org.springframework.boot.web.client.RestTemplateBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.converter.xml.MappingJackson2XmlHttpMessageConverter; +import org.springframework.web.client.RestTemplate; + +@Configuration +public class RestTemplateConfig { + + @Bean + public RestTemplate restTemplate(RestTemplateBuilder builder) { + return builder + .additionalMessageConverters(new MappingJackson2XmlHttpMessageConverter()) + .defaultHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)") + .setConnectTimeout(Duration.ofSeconds(5)) + .setReadTimeout(Duration.ofSeconds(5)) + .build(); + } +} diff --git a/backend/src/main/java/kernel360/techpick/entity/article/Article.java b/backend/src/main/java/kernel360/techpick/entity/article/Article.java index b2fff928..7a80ebe4 100644 --- a/backend/src/main/java/kernel360/techpick/entity/article/Article.java +++ b/backend/src/main/java/kernel360/techpick/entity/article/Article.java @@ -53,10 +53,6 @@ public class Article extends CreatedAndUpdatedTimeColumn { @JoinColumn(name = "blog_id", nullable = false) private Blog blog; - // 수집 날짜 (크롤링 시각) - @Column(name = "crawled_at", nullable = false) - private LocalDateTime crawled_at; - // 대표 이미지 CDN url @Column(name = "image_url") private String imageUrl; // nullable diff --git a/backend/src/main/java/kernel360/techpick/entity/article/RawCrawledArticle.java b/backend/src/main/java/kernel360/techpick/entity/article/RawCrawledArticle.java index 7fd412b6..e4c658de 100644 --- a/backend/src/main/java/kernel360/techpick/entity/article/RawCrawledArticle.java +++ b/backend/src/main/java/kernel360/techpick/entity/article/RawCrawledArticle.java @@ -1,10 +1,13 @@ package kernel360.techpick.entity.article; +import java.util.List; + import jakarta.persistence.Column; import jakarta.persistence.Entity; import jakarta.persistence.GeneratedValue; import jakarta.persistence.GenerationType; import jakarta.persistence.Id; +import jakarta.persistence.Lob; import jakarta.persistence.Table; import kernel360.techpick.entity.common.CreatedAndUpdatedTimeColumn; import lombok.AccessLevel; @@ -19,7 +22,6 @@ @Table(name = "raw_crawled_article") @Entity @Getter -@AllArgsConstructor @NoArgsConstructor(access = AccessLevel.PROTECTED) public class RawCrawledArticle extends CreatedAndUpdatedTimeColumn { @@ -28,8 +30,27 @@ public class RawCrawledArticle extends CreatedAndUpdatedTimeColumn { @Column(name = "raw_crawled_article_id") private Long rawCrawledArticleId; - // TODO: 아래 크롤링 데이터 칼럼은 토의 후 바뀔 예정 입니다. - // 크롤링 데이터 - @Column(name = "data", nullable = false) - private String data; + // TODO: 변수명은 변경 될 수 있습니다. + @Column(name = "title") + private String title; + + @Column(name = "link", columnDefinition = "LONGBLOB") + private String link; + + @Column(name = "pubDate") + private String pubDate; + + @Column(name = "creator") + private String creator; + + @Column(name = "joinedCategories") + private String joinedCategories; + + public RawCrawledArticle(String title, String link, String pubDate, String creator, String joinedCategories) { + this.title = title; + this.link = link; + this.pubDate = pubDate; + this.creator = creator; + this.joinedCategories = joinedCategories; + } } diff --git a/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java b/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java new file mode 100644 index 00000000..66dc4cd9 --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java @@ -0,0 +1,63 @@ +package kernel360.techpick.rss.controller; + +import java.util.List; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import kernel360.techpick.rss.dto.RssResponse; +import kernel360.techpick.rss.service.RssService; +import lombok.RequiredArgsConstructor; + +@RestController +@RequestMapping("/rss") +@RequiredArgsConstructor +public class RssController { + + private final RssService rssService; + + @GetMapping + public ResponseEntity getNewRss() { + List rss = rssService.getNewRss(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/baeldung") + public ResponseEntity getBaeldung() { + RssResponse.Channel rss = rssService.getRssByBaeldung(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/toss") + public ResponseEntity getToss() { + RssResponse.Channel rss = rssService.getRssByToss(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/woowa") + public ResponseEntity getWoowa() { + RssResponse.Channel rss = rssService.getRssByWoowa(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/kakao") + public ResponseEntity getKakao() { + RssResponse.Channel rss = rssService.getRssByKakao(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/daangn") + public ResponseEntity getDaangn() { + RssResponse.Channel rss = rssService.getRssByDaangn(); + return ResponseEntity.ok(rss); + } + + @GetMapping("/line") + public ResponseEntity getLine() { + RssResponse.Channel rss = rssService.getRssByLine(); + return ResponseEntity.ok(rss); + } + +} diff --git a/backend/src/main/java/kernel360/techpick/rss/dto/RssResponse.java b/backend/src/main/java/kernel360/techpick/rss/dto/RssResponse.java new file mode 100644 index 00000000..3e710a70 --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/rss/dto/RssResponse.java @@ -0,0 +1,43 @@ +package kernel360.techpick.rss.dto; + +import java.util.List; + +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +import lombok.Getter; + +@Getter +public class RssResponse { + + @JacksonXmlProperty(localName = "channel") + private Channel channel; + + @Getter + public static class Channel { + + @JacksonXmlElementWrapper(useWrapping = false) + @JacksonXmlProperty(localName = "item") + private List item; + } + + @Getter + public static class Item { + + @JacksonXmlProperty(localName = "title") + private String title; + + @JacksonXmlProperty(localName = "link") + private String link; + + @JacksonXmlProperty(localName = "pubDate") + private String pubDate; + + @JacksonXmlProperty(localName = "creator") + private String creator; + + @JacksonXmlElementWrapper(useWrapping = false) + @JacksonXmlProperty(localName = "category") + private List category; + } +} diff --git a/backend/src/main/java/kernel360/techpick/rss/dto/Url.java b/backend/src/main/java/kernel360/techpick/rss/dto/Url.java new file mode 100644 index 00000000..4d63118c --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/rss/dto/Url.java @@ -0,0 +1,22 @@ +package kernel360.techpick.rss.dto; + +import lombok.Getter; + +@Getter +public enum Url { + + // TODO: Blog Entity에 rssUrl로 넣어야 함. + BAELDUNG_URL("https://www.baeldung.com/feed"), + TOSS_URL("https://toss.tech/rss.xml"), + WOOWA_URL("https://techblog.woowahan.com/feed/"), + KAKAO_URL("https://tech.kakao.com/posts/feed"), + DAANGN_URL("https://medium.com/feed/daangn"), + LINE_URL("https://techblog.lycorp.co.jp/ko/feed/index.xml"); + + private final String url; + + Url(String url) { + this.url = url; + } + +} diff --git a/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java b/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java new file mode 100644 index 00000000..edb76233 --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java @@ -0,0 +1,14 @@ +package kernel360.techpick.rss.repository; + +import java.util.List; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; + +import kernel360.techpick.entity.article.RawCrawledArticle; + +public interface RssRepository extends JpaRepository { + + @Query("SELECT r.link FROM RawCrawledArticle r") + List findAllLinks(); +} diff --git a/backend/src/main/java/kernel360/techpick/rss/service/RssService.java b/backend/src/main/java/kernel360/techpick/rss/service/RssService.java new file mode 100644 index 00000000..e50f841d --- /dev/null +++ b/backend/src/main/java/kernel360/techpick/rss/service/RssService.java @@ -0,0 +1,89 @@ +package kernel360.techpick.rss.service; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; +import org.springframework.web.client.RestTemplate; + +import kernel360.techpick.entity.article.RawCrawledArticle; +import kernel360.techpick.rss.dto.RssResponse; +import kernel360.techpick.rss.dto.Url; +import kernel360.techpick.rss.repository.RssRepository; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class RssService { + + private final RestTemplate restTemplate; + private final RssRepository rssRepository; + + // 새로운 글 탐지 + 초기 데이터 수집 + @Scheduled(cron = "0 0 3 * * *") + public List getNewRss() { + Set links = new HashSet<>(rssRepository.findAllLinks()); + List rssList = new ArrayList<>(); + for (Url url : Url.values()) { + RssResponse rss = restTemplate.getForObject(url.getUrl(), RssResponse.class); + rssList.add(Objects.requireNonNull(rss).getChannel()); + } + + List articles = rssList.stream() + .flatMap(channel -> channel.getItem().stream()) + .filter(item -> !links.contains(item.getLink())) + .map(item -> { + String joinedCategories = null; + if (Objects.nonNull(item.getCategory())) { + joinedCategories = String.join(",", item.getCategory()); + } + RawCrawledArticle article = new RawCrawledArticle(item.getTitle(), item.getLink(), + item.getPubDate(), + item.getCreator(), joinedCategories); + + log.info(article.getLink()); + return article; + }) + .toList(); + + rssRepository.saveAll(articles); + return rssList; + } + + public RssResponse.Channel getRssByBaeldung() { + RssResponse rss = restTemplate.getForObject(Url.BAELDUNG_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + + public RssResponse.Channel getRssByToss() { + RssResponse rss = restTemplate.getForObject(Url.TOSS_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + + public RssResponse.Channel getRssByWoowa() { + RssResponse rss = restTemplate.getForObject(Url.WOOWA_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + + public RssResponse.Channel getRssByKakao() { + RssResponse rss = restTemplate.getForObject(Url.KAKAO_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + + public RssResponse.Channel getRssByDaangn() { + RssResponse rss = restTemplate.getForObject(Url.DAANGN_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + + public RssResponse.Channel getRssByLine() { + RssResponse rss = restTemplate.getForObject(Url.LINE_URL.getUrl(), RssResponse.class); + return Objects.requireNonNull(rss).getChannel(); + } + +} From 5549279d7d598b346a60bdd76f0ebb8a113fc404 Mon Sep 17 00:00:00 2001 From: sangwonsheep Date: Tue, 17 Sep 2024 18:40:05 +0900 Subject: [PATCH 2/2] =?UTF-8?q?refactor:=20=EC=A3=BC=EC=84=9D=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80,=20RssService=20=EB=A9=94=EC=84=9C=EB=93=9C=20?= =?UTF-8?q?=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../rss/controller/RssController.java | 3 ++ .../rss/repository/RssRepository.java | 3 ++ .../techpick/rss/service/RssService.java | 37 ++++++++++--------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java b/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java index 66dc4cd9..d4d54ea2 100644 --- a/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java +++ b/backend/src/main/java/kernel360/techpick/rss/controller/RssController.java @@ -11,6 +11,9 @@ import kernel360.techpick.rss.service.RssService; import lombok.RequiredArgsConstructor; +/** + * RSS 테스트용 컨트롤러 + */ @RestController @RequestMapping("/rss") @RequiredArgsConstructor diff --git a/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java b/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java index edb76233..07ee351b 100644 --- a/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java +++ b/backend/src/main/java/kernel360/techpick/rss/repository/RssRepository.java @@ -7,6 +7,9 @@ import kernel360.techpick.entity.article.RawCrawledArticle; +/** + * 파이썬 서버 구현 시 삭제 예정 + */ public interface RssRepository extends JpaRepository { @Query("SELECT r.link FROM RawCrawledArticle r") diff --git a/backend/src/main/java/kernel360/techpick/rss/service/RssService.java b/backend/src/main/java/kernel360/techpick/rss/service/RssService.java index e50f841d..b4712e40 100644 --- a/backend/src/main/java/kernel360/techpick/rss/service/RssService.java +++ b/backend/src/main/java/kernel360/techpick/rss/service/RssService.java @@ -35,23 +35,7 @@ public List getNewRss() { rssList.add(Objects.requireNonNull(rss).getChannel()); } - List articles = rssList.stream() - .flatMap(channel -> channel.getItem().stream()) - .filter(item -> !links.contains(item.getLink())) - .map(item -> { - String joinedCategories = null; - if (Objects.nonNull(item.getCategory())) { - joinedCategories = String.join(",", item.getCategory()); - } - RawCrawledArticle article = new RawCrawledArticle(item.getTitle(), item.getLink(), - item.getPubDate(), - item.getCreator(), joinedCategories); - - log.info(article.getLink()); - return article; - }) - .toList(); - + List articles = getCrawledArticleList(rssList, links); rssRepository.saveAll(articles); return rssList; } @@ -86,4 +70,23 @@ public RssResponse.Channel getRssByLine() { return Objects.requireNonNull(rss).getChannel(); } + private List getCrawledArticleList(List rssList, Set links) { + return rssList.stream() + .flatMap(channel -> channel.getItem().stream()) + .filter(item -> !links.contains(item.getLink())) + .map(this::getCrawledArticle) + .toList(); + } + + private RawCrawledArticle getCrawledArticle(RssResponse.Item item) { + String joinedCategories = null; + if (Objects.nonNull(item.getCategory())) { + joinedCategories = String.join(",", item.getCategory()); + } + + return new RawCrawledArticle(item.getTitle(), item.getLink(), + item.getPubDate(), + item.getCreator(), joinedCategories); + } + }