add slop filter
This commit is contained in:
parent
a6e49c9643
commit
e479eac246
|
|
@ -7,9 +7,10 @@ import { validateVideo, validateChannel } from '@/utils/regex'
|
|||
import { checkCaptcha, createDatabaseVideo } from '@/utils/common';
|
||||
import { downloadVideo } from '@/utils/download';
|
||||
import { uploadVideo } from '@/utils/upload';
|
||||
import { getChannelVideos } from '@/utils/metadata';
|
||||
import { getChannelVideos, getVideo } from '@/utils/metadata';
|
||||
import { error } from '@/utils/html'
|
||||
import redis from '@/utils/redis';
|
||||
import { parseSlop } from '@/utils/slop';
|
||||
|
||||
const app = new Elysia()
|
||||
const videoIds: Record<string, string> = {}
|
||||
|
|
@ -128,6 +129,15 @@ app.ws('/save', {
|
|||
ws.send('DATA - Captcha validated. Starting download...');
|
||||
}
|
||||
|
||||
const data = await getVideo(videoId)
|
||||
const slopScore = await parseSlop(videoId, data.videoDetails.title,
|
||||
(data.microformat.playerMicroformatRenderer.description?.simpleText || '').replaceAll('\n', '<br>'))
|
||||
|
||||
if (slopScore >= 4) {
|
||||
sendError(ws, 'Filters can always be wrong. Is the rating wrong? Email me at admin@preservetube.com');
|
||||
return sendError(ws, 'Your download has been rejected by our slop filter.');
|
||||
}
|
||||
|
||||
const downloadResult = await downloadVideo(ws, videoId);
|
||||
if (downloadResult.fail) {
|
||||
await cleanup(ws, videoId);
|
||||
|
|
@ -201,6 +211,14 @@ app.ws('/savechannel', {
|
|||
break;
|
||||
}
|
||||
|
||||
const slopScore = await parseSlop(video.video_id, video.title.text, video.description_snippet.text)
|
||||
|
||||
if (slopScore >= 4) {
|
||||
sendError(ws, 'Filters can always be wrong. Is the rating wrong? Email me at admin@preservetube.com');
|
||||
sendError(ws, 'Your download has been rejected by our slop filter.');
|
||||
continue;
|
||||
}
|
||||
|
||||
ws.send(`DATA - Processing video: ${video.title.text}`);
|
||||
await redis.set(video.video_id, 'downloading', 'EX', 300);
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
import redis from '@/utils/redis';
|
||||
|
||||
async function analyseSlop(id: string, title: string, description: string) {
|
||||
const llmResponse = await (await fetch('https://nano-gpt.com/api/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': 'Bearer ' + process.env.NANOGPT_API,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'gemini-2.5-flash-lite',
|
||||
messages: [
|
||||
{ role: 'user', content: `Role: You are "The Slop Detector." Analyze video titles and rate them 0-5 on the "Slop Score." Slop is derivative content using popular movie/TV footage + trendy audio + basic editing.
|
||||
|
||||
### SLOP SCORE (0-5)
|
||||
- **0:** Original, complex, artistic. No slop signals.
|
||||
- **1:** Fan edit with unique perspective, non-obvious choices.
|
||||
- **2:** Well-made but predictable, adds nothing new.
|
||||
- **3:** Some slop signals present, minimal effort visible.
|
||||
- **4:** Textbook slop — popular character + overused/slowed song + 4K tag + "Edit."
|
||||
- **5:** Maximum slop density — every possible signal stacked.
|
||||
|
||||
### SLOP SIGNALS
|
||||
Use these as intuition guides, not a checklist. Weight them by how many stack together.
|
||||
|
||||
- **Emoji in title** — strong signal, especially 😂🤣😎😍🥺. Multiple emoji = very strong.
|
||||
- **Title is entirely hashtags** — near-instant slop.
|
||||
- **Unicode styled text** (𝐋𝐢𝐤𝐞 𝐓𝐡𝐢𝐬) — common in character edits.
|
||||
- **Pipe separators** (|) splitting title into: caption | source | song — classic slop structure.
|
||||
- **4K / [4K] tag** — almost always slop when paired with anything else.
|
||||
- **"Edit" / "OneShot" / "Morphosis"** suffix or delimiter usage (║ Edit ║, 「Edit」).
|
||||
- **Slowed / Reverb / Slowed+Reverb / MONTAGEM** — audio slop markers.
|
||||
- **Known slop franchises:** Johnny English, Mr. Bean, Breaking Bad, Peaky Blinders, American Psycho, Patrick Bateman, The Boys, Homelander, Dexter, Joe Goldberg, Squid Game, Rick Grimes, Thomas Shelby, John Wick, Kingsman, and similar.
|
||||
- **Song name explicitly in title** — especially if slowed/remixed.
|
||||
- **Description** — if it contains hashtag spam, "subscribe," "no copyright," or music credits it reinforces slop signals from the title.
|
||||
- **Tutorials and how-to videos** — score 0, always.
|
||||
|
||||
### OUTPUT
|
||||
Valid JSON only. No other text. Reasoning max one sentence, and brief.
|
||||
|
||||
{"score": 0, "reasoning": "..."}
|
||||
|
||||
User Title: ${title}
|
||||
User Description: ${description.slice(0,100)}` }
|
||||
]
|
||||
})
|
||||
})).json()
|
||||
|
||||
const parsedResponse: {score: number, reasoning: string} = JSON.parse(llmResponse.choices[0].message.content.replace(/```json|```/g, '').trim())
|
||||
console.log(`parsed ${id} - ${JSON.stringify(parsedResponse)}`)
|
||||
|
||||
return parsedResponse
|
||||
}
|
||||
|
||||
async function parseSlop(id: string, title: string, description: string): Promise<number> {
|
||||
const cachedSlop = await redis.get(`slop:${id}`)
|
||||
if (cachedSlop) return parseInt(cachedSlop)
|
||||
|
||||
const { score } = await analyseSlop(id , title, description)
|
||||
await redis.set(`slop:${id}`, score, 'EX', 60 * 60 * 24 * 7)
|
||||
return score
|
||||
}
|
||||
|
||||
export { parseSlop }
|
||||
Loading…
Reference in New Issue