PatoisSMEH/backend/server.js

import express from 'express'
import cors from 'cors'
import fs from 'fs/promises'
import path from 'path'
import { fileURLToPath } from 'url'
import dotenv from 'dotenv'

const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)

/** Configuration dotenv
 * Permet de charger les variables d'environnement depuis un fichier .env
 * */
dotenv.config()

const app = express()
const PORT = process.env.PORT

// Middleware
app.use(cors())
app.use(express.json())

// Chemin vers le dossier texts (dossier parent)
const TEXTS_DIR = process.env.TEXTS_PATH || "C:\\Users\\paulf\\Documents\\texts"

/**
 * Service pour scanner et charger les textes depuis le dossier texts/
 */
class TextService {
  constructor() {
    this.cache = new Map()
    this.lastScan = null
    this.CACHE_DURATION = 5 * 60 * 1000 // 5 minutes

    // NOUVEAU : Index de recherche optimisé
    this.searchIndex = new Map()
    this.indexBuilt = false
  }

  /**
   * Parse le fichier metadata.txt
   */
  parseMetadata(content) {
    const metadata = {}
    const lines = content.split('\n').filter(line => line.trim())

    for (const line of lines) {
      const [key, ...valueParts] = line.split('=')
      if (key && valueParts.length > 0) {
        metadata[key.trim()] = valueParts.join('=').trim()
      }
    }

    return metadata
  }

  /**
   * Vérifie si un fichier existe
   */
  async fileExists(filePath) {
    try {
      await fs.access(filePath)
      return true
    } catch {
      return false
    }
  }

  /**
   * Charge un texte spécifique par son ID
   */
  async loadText(textId) {
    try {
      const textDir = path.join(TEXTS_DIR, textId)

      // Vérifier que le dossier existe
      const dirExists = await this.fileExists(textDir)
      if (!dirExists) {
        throw new Error(`Texte "${textId}" non trouvé`)
      }

      // Charger les fichiers principaux
      const frPath = path.join(textDir, 'fr.txt')
      const ptPath = path.join(textDir, 'pt.txt')
      const metadataPath = path.join(textDir, 'metadata.txt')

      const [frExists, ptExists, metadataExists] = await Promise.all([
        this.fileExists(frPath),
        this.fileExists(ptPath),
        this.fileExists(metadataPath)
      ])

      if (!frExists || !ptExists || !metadataExists) {
        throw new Error(`Fichiers manquants pour le texte "${textId}"`)
      }

      // Lire les fichiers
      const [frenchText, patoisText, metadataContent] = await Promise.all([
        fs.readFile(frPath, 'utf-8'),
        fs.readFile(ptPath, 'utf-8'),
        fs.readFile(metadataPath, 'utf-8')
      ])

      // Vérifier si un fichier audio existe
      const audioPath = path.join(textDir, 'audio.mp3')
      const hasAudio = await this.fileExists(audioPath)

      const metadata = this.parseMetadata(metadataContent)

      return {
        id: textId,
        frenchText: frenchText.trim(),
        patoisText: patoisText.trim(),
        metadata,
        hasAudio
      }
    } catch (error) {
      console.error(`Erreur lors du chargement du texte ${textId}:`, error)
      throw error
    }
  }

  /**
   * Scanne le dossier texts/ pour découvrir tous les textes disponibles
   */
  async scanTexts() {
    try {
      // Vérifier si le cache est encore valide
      if (this.lastScan && (Date.now() - this.lastScan) < this.CACHE_DURATION) {
        return Array.from(this.cache.values())
      }

      const dirExists = await this.fileExists(TEXTS_DIR)
      if (!dirExists) {
        console.warn(`Dossier texts/ non trouvé: ${TEXTS_DIR}`)
        return []
      }

      const entries = await fs.readdir(TEXTS_DIR, { withFileTypes: true })
      const textDirs = entries.filter(entry => entry.isDirectory()).map(entry => entry.name)

      const texts = []
      for (const textId of textDirs) {
        try {
          const textData = await this.loadText(textId)
          texts.push(textData)
          this.cache.set(textId, textData)
        } catch (error) {
          console.warn(`Impossible de charger le texte ${textId}:`, error.message)
        }
      }

      this.lastScan = Date.now()

      // NOUVEAU : Construction de l'index de recherche après le scan
      this.buildSearchIndex(texts)

      return texts
    } catch (error) {
      console.error('Erreur lors du scan des textes:', error)
      throw error
    }
  }

  /**
   * NOUVEAU : Construit un index de recherche optimisé
   */
  buildSearchIndex(texts) {
    console.log('🔍 Construction de l\'index de recherche...')
    this.searchIndex.clear()

    for (const text of texts) {
      const searchableContent = [
        text.metadata.titre_fr || '',
        text.metadata.titre_pt || '',
        text.metadata.auteur || '',
        text.metadata.categorie || '',
        text.frenchText || '',
        text.patoisText || ''
      ].join(' ').toLowerCase()

      // Tokenisation simple mais efficace
      const words = searchableContent
        .split(/\s+/)
        .filter(word => word.length > 2) // Ignore les mots de moins de 3 caractères
        .map(word => word.replace(/[^\w]/g, '')) // Nettoie la ponctuation
        .filter(word => word.length > 0)

      // Indexation par mots
      for (const word of words) {
        if (!this.searchIndex.has(word)) {
          this.searchIndex.set(word, new Set())
        }
        this.searchIndex.get(word).add(text.id)
      }
    }

    this.indexBuilt = true
    console.log(`✅ Index construit : ${this.searchIndex.size} mots uniques indexés`)
  }

  /**
   * OPTIMISÉ : Recherche rapide utilisant l'index
   */
  searchWithIndex(query, allTexts) {
    if (!query || !query.trim()) return allTexts

    const searchTerms = query.toLowerCase()
      .split(/\s+/)
      .filter(term => term.length > 2)
      .map(term => term.replace(/[^\w]/g, ''))

    if (searchTerms.length === 0) return allTexts

    // Recherche dans l'index pour chaque terme
    let matchingIds = null

    for (const term of searchTerms) {
      const idsForTerm = new Set()

      // Recherche exacte et préfixe
      for (const [indexedWord, ids] of this.searchIndex) {
        if (indexedWord.includes(term)) {
          for (const id of ids) {
            idsForTerm.add(id)
          }
        }
      }

      // Intersection des résultats (ET logique)
      if (matchingIds === null) {
        matchingIds = idsForTerm
      } else {
        matchingIds = new Set([...matchingIds].filter(id => idsForTerm.has(id)))
      }

      // Si aucun résultat, arrêter
      if (matchingIds.size === 0) break
    }

    // Retourner les textes correspondants
    return allTexts.filter(text => matchingIds && matchingIds.has(text.id))
  }

  /**
   * Recherche dans les textes
   */
  async searchTexts(query, filters = {}) {
    const allTexts = await this.scanTexts()
    let results = [...allTexts]

    // Recherche textuelle
    if (query && query.trim()) {
      // Utiliser l'index de recherche si construit
      if (this.indexBuilt) {
        results = this.searchWithIndex(query, results)
      } else {
        const searchTerm = query.toLowerCase().trim()
        results = results.filter(text => {
          return (
            text.metadata.titre_fr?.toLowerCase().includes(searchTerm) ||
            text.metadata.titre_pt?.toLowerCase().includes(searchTerm) ||
            text.metadata.auteur?.toLowerCase().includes(searchTerm) ||
            text.frenchText?.toLowerCase().includes(searchTerm) ||
            text.patoisText?.toLowerCase().includes(searchTerm)
          )
        })
      }
    }

    // Filtres
    if (filters.category) {
      results = results.filter(text => text.metadata.categorie === filters.category)
    }

    if (filters.difficulty) {
      results = results.filter(text => text.metadata.difficulte === filters.difficulty)
    }

    if (filters.onlyWithAudio === 'true') {
      results = results.filter(text => text.hasAudio)
    }

    return results
  }

  /**
   * NOUVEAU : Obtient toutes les catégories disponibles
   */
  async getCategories() {
    const allTexts = await this.scanTexts()
    const categories = new Set()

    for (const text of allTexts) {
      if (text.metadata.categorie) {
        categories.add(text.metadata.categorie)
      }
    }

    return Array.from(categories).sort()
  }

  /**
   * Obtient les statistiques
   */
  async getStats() {
    const allTexts = await this.scanTexts()
    const authors = new Set()
    const categories = new Set()
    let withAudio = 0

    for (const text of allTexts) {
      if (text.metadata.auteur) authors.add(text.metadata.auteur)
      if (text.metadata.categorie) categories.add(text.metadata.categorie)
      if (text.hasAudio) withAudio++
    }

    return {
      totalTexts: allTexts.length,
      withAudio,
      authors: authors.size,
      categories: categories.size
    }
  }

    async getRandomText() {
    const allTexts = await this.scanTexts()
    if (allTexts.length === 0) {
        throw new Error('Aucun texte disponible')
    }
    const randomIndex = Math.floor(Math.random() * allTexts.length)
    return allTexts[randomIndex]
    }
}

// Instance du service
const textService = new TextService()

// ==================== ROUTES API ====================

/**
 * GET /api/texts - Liste tous les textes
 */
app.get('/api/texts', async (req, res) => {
  try {
    const { search, category, difficulty, onlyWithAudio } = req.query

    if (search || category || difficulty || onlyWithAudio) {
      // Recherche avec filtres
      const results = await textService.searchTexts(search, {
        category,
        difficulty,
        onlyWithAudio
      })
      res.json(results)
    } else {
      // Liste complète
      const texts = await textService.scanTexts()
      res.json(texts)
    }
  } catch (error) {
    console.error('Erreur GET /api/texts:', error)
    res.status(500).json({ error: 'Erreur lors du chargement des textes' })
  }
})

/**
 * GET /api/texts/:id - Détails d'un texte spécifique
 */
app.get('/api/texts/:id', async (req, res) => {
  try {
    const { id } = req.params
    const text = await textService.loadText(id)
    res.json(text)
  } catch (error) {
    console.error(`Erreur GET /api/texts/${req.params.id}:`, error)
    res.status(404).json({ error: error.message })
  }
})

/**
 * GET /api/texts/random - Texte aléatoire
 */
app.get('/api/random', async (req, res) => {
  try {
    const randomText = await textService.getRandomText()
    res.json(randomText)
  } catch (error) {
    console.error('Erreur GET /api/random:', error)
    res.status(500).json({ error: error.message })
  }
})

/**
 * GET /api/stats - Statistiques de la collection
 */
app.get('/api/stats', async (req, res) => {
  try {
    const stats = await textService.getStats()
    res.json(stats)
  } catch (error) {
    console.error('Erreur GET /api/stats:', error)
    res.status(500).json({ error: 'Erreur lors du chargement des statistiques' })
  }
})

/**
 * GET /api/texts/:id/audio - Fichier audio
 */
app.get('/api/texts/:id/audio', async (req, res) => {
  try {
    const { id } = req.params
    const audioPath = path.join(TEXTS_DIR, id, 'audio.mp3')

    const exists = await textService.fileExists(audioPath)
    if (!exists) {
      return res.status(404).json({ error: 'Fichier audio non trouvé' })
    }

    // Servir le fichier audio
    res.sendFile(audioPath)
  } catch (error) {
    console.error(`Erreur GET /api/texts/${req.params.id}/audio:`, error)
    res.status(500).json({ error: 'Erreur lors du chargement du fichier audio' })
  }
})

// Route de santé
app.get('/api/health', (req, res) => {
  res.json({ status: 'OK', timestamp: new Date().toISOString() })
})

// Démarrage du serveur
app.listen(PORT, () => {
  console.log(`🚀 Serveur API Patois démarré sur http://localhost:${PORT}`)
  console.log(`📁 Dossier texts: ${TEXTS_DIR}`)
  console.log(`🔍 Endpoints disponibles:`)
  console.log(`   GET /api/texts - Liste des textes`)
  console.log(`   GET /api/texts/:id - Détails d'un texte`)
  console.log(`   GET /api/random - Texte aléatoire`)
  console.log(`   GET /api/stats - Statistiques`)
  console.log(`   GET /api/texts/:id/audio - Fichier audio`)
})

export default app