TaskReference.java 8.9 KB
package com.aigeo.article.entity;

import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.hibernate.annotations.CreationTimestamp;

import jakarta.persistence.*;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import java.time.LocalDateTime;

/**
 * 任务参考资料实体类
 * 对应数据库表:ai_task_references
 * 
 * 存储文章生成任务的参考资料信息,包括:
 * - 参考网站URL和标题
 * - 参考内容摘要
 * - 资料权重和可信度评分
 * - 使用状态和备注信息
 *
 * @author AIGEO Team
 * @since 1.0.0
 */
@Data
@Entity
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Table(name = "ai_task_references", indexes = {
    @Index(name = "idx_task_refs_task_id", columnList = "task_id"),
    @Index(name = "idx_task_refs_url", columnList = "reference_url")
})
public class TaskReference {
    
    /**
     * 主键ID
     */
    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    @Column(name = "id", nullable = false, updatable = false)
    private Integer id;

    /**
     * 关联的生成任务ID
     */
    @NotNull(message = "任务ID不能为空")
    @Column(name = "task_id", nullable = false)
    private Integer taskId;

    /**
     * 参考资料URL
     */
    @NotBlank(message = "参考资料URL不能为空")
    @Column(name = "reference_url", nullable = false, length = 500)
    private String referenceUrl;

    /**
     * 参考资料标题
     */
    @Column(name = "reference_title", length = 500)
    private String referenceTitle;

    /**
     * 参考内容摘要
     */
    @Column(name = "content_summary", length = 2000)
    private String contentSummary;

    /**
     * 资料来源域名
     */
    @Column(name = "source_domain", length = 100)
    private String sourceDomain;

    /**
     * 内容类型(如:article, blog, news, academic等)
     */
    @Column(name = "content_type", length = 50)
    @Builder.Default
    private String contentType = "article";

    /**
     * 资料权重(影响生成结果的重要程度,1-10分)
     */
    @Column(name = "weight")
    @Builder.Default
    private Integer weight = 5;

    /**
     * 可信度评分(1-10分)
     */
    @Column(name = "credibility_score")
    @Builder.Default
    private Integer credibilityScore = 5;

    /**
     * 相关性评分(与目标话题的相关程度,1-10分)
     */
    @Column(name = "relevance_score")
    @Builder.Default
    private Integer relevanceScore = 5;

    /**
     * 内容质量评分(1-10分)
     */
    @Column(name = "quality_score")
    private Integer qualityScore;

    /**
     * 发布时间(参考资料的原始发布时间)
     */
    @Column(name = "published_at")
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    private LocalDateTime publishedAt;

    /**
     * 语言标识
     */
    @Column(name = "language", length = 10)
    @Builder.Default
    private String language = "zh-CN";

    /**
     * 字数统计
     */
    @Column(name = "word_count")
    private Integer wordCount;

    /**
     * 是否已处理(是否已被AI分析处理)
     */
    @Column(name = "is_processed")
    @Builder.Default
    private Boolean isProcessed = false;

    /**
     * 处理状态(pending, processing, completed, failed)
     */
    @Column(name = "processing_status", length = 20)
    @Builder.Default
    private String processingStatus = "pending";

    /**
     * 使用状态(是否在生成中被实际使用)
     */
    @Column(name = "is_used")
    @Builder.Default
    private Boolean isUsed = false;

    /**
     * 错误信息(处理失败时的错误描述)
     */
    @Column(name = "error_message", length = 1000)
    private String errorMessage;

    /**
     * 提取的关键词(JSON数组或逗号分隔)
     */
    @Column(name = "extracted_keywords", length = 1000)
    private String extractedKeywords;

    /**
     * 提取的实体(人名、地名、机构名等,JSON格式)
     */
    @Column(name = "extracted_entities", columnDefinition = "JSON")
    private String extractedEntities;

    /**
     * 内容分类标签
     */
    @Column(name = "content_tags", length = 500)
    private String contentTags;

    /**
     * 备注信息
     */
    @Column(name = "notes", length = 500)
    private String notes;

    /**
     * 元数据(JSON格式存储额外信息)
     */
    @Column(name = "metadata", columnDefinition = "JSON")
    private String metadata;

    /**
     * 创建时间
     */
    @CreationTimestamp
    @Column(name = "created_at", updatable = false)
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    private LocalDateTime createdAt;

    /**
     * 实体创建前的处理
     */
    @PrePersist
    protected void onCreate() {
        if (contentType == null) contentType = "article";
        if (weight == null) weight = 5;
        if (credibilityScore == null) credibilityScore = 5;
        if (relevanceScore == null) relevanceScore = 5;
        if (language == null) language = "zh-CN";
        if (isProcessed == null) isProcessed = false;
        if (processingStatus == null) processingStatus = "pending";
        if (isUsed == null) isUsed = false;
        
        // 从URL提取域名
        if (sourceDomain == null && referenceUrl != null) {
            this.sourceDomain = extractDomainFromUrl(referenceUrl);
        }
    }

    /**
     * 从URL提取域名
     * @param url 完整URL
     * @return 域名
     */
    private String extractDomainFromUrl(String url) {
        try {
            if (url.startsWith("http://")) {
                url = url.substring(7);
            } else if (url.startsWith("https://")) {
                url = url.substring(8);
            }
            
            int slashIndex = url.indexOf('/');
            if (slashIndex != -1) {
                url = url.substring(0, slashIndex);
            }
            
            return url;
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * 检查是否为高质量参考资料
     */
    public boolean isHighQuality() {
        return (credibilityScore != null && credibilityScore >= 7) &&
               (relevanceScore != null && relevanceScore >= 7) &&
               (qualityScore == null || qualityScore >= 7);
    }

    /**
     * 检查是否为权威来源
     */
    public boolean isAuthoritativeSource() {
        if (sourceDomain == null) return false;
        
        // 常见权威域名后缀
        return sourceDomain.endsWith(".edu") || 
               sourceDomain.endsWith(".gov") || 
               sourceDomain.endsWith(".org") ||
               credibilityScore != null && credibilityScore >= 8;
    }

    /**
     * 获取综合评分(权重、可信度、相关性的加权平均)
     */
    public double getOverallScore() {
        double weightScore = weight != null ? weight : 5;
        double credibilityScore = this.credibilityScore != null ? this.credibilityScore : 5;
        double relevanceScore = this.relevanceScore != null ? this.relevanceScore : 5;
        double qualityScore = this.qualityScore != null ? this.qualityScore : 5;
        
        // 加权计算:相关性40%,可信度30%,质量20%,权重10%
        return relevanceScore * 0.4 + credibilityScore * 0.3 + qualityScore * 0.2 + weightScore * 0.1;
    }

    /**
     * 检查内容是否为近期发布
     */
    public boolean isRecentContent() {
        if (publishedAt == null) return false;
        
        LocalDateTime sixMonthsAgo = LocalDateTime.now().minusMonths(6);
        return publishedAt.isAfter(sixMonthsAgo);
    }

    /**
     * 获取内容新鲜度评分(1-10分,越新分数越高)
     */
    public int getFreshnessScore() {
        if (publishedAt == null) return 5; // 默认中等分数
        
        LocalDateTime now = LocalDateTime.now();
        long daysDiff = java.time.Duration.between(publishedAt, now).toDays();
        
        if (daysDiff <= 7) return 10;      // 一周内:10分
        if (daysDiff <= 30) return 9;      // 一月内:9分
        if (daysDiff <= 90) return 8;      // 三月内:8分
        if (daysDiff <= 180) return 7;     // 半年内:7分
        if (daysDiff <= 365) return 6;     // 一年内:6分
        if (daysDiff <= 730) return 4;     // 两年内:4分
        
        return 2; // 超过两年:2分
    }

    /**
     * 标记为已处理
     */
    public void markAsProcessed() {
        this.isProcessed = true;
        this.processingStatus = "completed";
    }

    /**
     * 标记处理失败
     */
    public void markAsFailed(String errorMessage) {
        this.isProcessed = false;
        this.processingStatus = "failed";
        this.errorMessage = errorMessage;
    }

    /**
     * 标记为已使用
     */
    public void markAsUsed() {
        this.isUsed = true;
    }
}