Source from repo

Deep Research

Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.

199-biotechnologiesGitHub 199-biotechnologiesSource repo Original GitHub link

Files

Skill

n/a

Size

221.7 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

scripts/source_evaluator.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code293 linesFree

scripts/source_evaluator.py

1#!/usr/bin/env python3
2"""
3Source Credibility Evaluator
4Assesses source quality, credibility, and potential biases
5"""
6 
7from dataclasses import dataclass
8from typing import List, Dict, Optional
9from urllib.parse import urlparse
10from datetime import datetime, timedelta
11import re
12 
13 
14@dataclass
15class CredibilityScore:
16    """Represents source credibility assessment"""
17    overall_score: float  # 0-100
18    domain_authority: float  # 0-100
19    recency: float  # 0-100
20    expertise: float  # 0-100
21    bias_score: float  # 0-100 (higher = more neutral)
22    factors: Dict[str, str]
23    recommendation: str  # "high_trust", "moderate_trust", "low_trust", "verify"
24 
25 
26class SourceEvaluator:
27    """Evaluates source credibility and quality"""
28 
29    # Domain reputation tiers
30    HIGH_AUTHORITY_DOMAINS = {
31        # Academic & Research
32        'arxiv.org', 'nature.com', 'science.org', 'cell.com', 'nejm.org',
33        'thelancet.com', 'springer.com', 'sciencedirect.com', 'plos.org',
34        'ieee.org', 'acm.org', 'pubmed.ncbi.nlm.nih.gov',
35 
36        # Government & International Organizations
37        'nih.gov', 'cdc.gov', 'who.int', 'fda.gov', 'nasa.gov',
38        'gov.uk', 'europa.eu', 'un.org',
39 
40        # Established Tech Documentation
41        'docs.python.org', 'developer.mozilla.org', 'docs.microsoft.com',
42        'cloud.google.com', 'aws.amazon.com', 'kubernetes.io',
43 
44        # Reputable News (Fact-check verified)
45        'reuters.com', 'apnews.com', 'bbc.com', 'economist.com',
46        'nature.com/news', 'scientificamerican.com'
47    }
48 
49    MODERATE_AUTHORITY_DOMAINS = {
50        # Tech News & Analysis
51        'techcrunch.com', 'theverge.com', 'arstechnica.com', 'wired.com',
52        'zdnet.com', 'cnet.com',
53 
54        # Industry Publications
55        'forbes.com', 'bloomberg.com', 'wsj.com', 'ft.com',
56 
57        # Educational
58        'wikipedia.org', 'britannica.com', 'khanacademy.org',
59 
60        # Tech Blogs (established)
61        'medium.com', 'dev.to', 'stackoverflow.com', 'github.com'
62    }
63 
64    LOW_AUTHORITY_INDICATORS = [
65        'blogspot.com', 'wordpress.com', 'wix.com', 'substack.com'
66    ]
67 
68    def __init__(self):
69        pass
70 
71    def evaluate_source(
72        self,
73        url: str,
74        title: str,
75        content: Optional[str] = None,
76        publication_date: Optional[str] = None,
77        author: Optional[str] = None
78    ) -> CredibilityScore:
79        """Evaluate source credibility"""
80 
81        domain = self._extract_domain(url)
82 
83        # Calculate component scores
84        domain_score = self._evaluate_domain_authority(domain)
85        recency_score = self._evaluate_recency(publication_date)
86        expertise_score = self._evaluate_expertise(domain, title, author)
87        bias_score = self._evaluate_bias(domain, title, content)
88 
89        # Calculate overall score (weighted average)
90        overall = (
91            domain_score * 0.35 +
92            recency_score * 0.20 +
93            expertise_score * 0.25 +
94            bias_score * 0.20
95        )
96 
97        # Determine factors
98        factors = self._identify_factors(
99            domain, domain_score, recency_score, expertise_score, bias_score
100        )
101 
102        # Generate recommendation
103        recommendation = self._generate_recommendation(overall)
104 
105        return CredibilityScore(
106            overall_score=round(overall, 2),
107            domain_authority=round(domain_score, 2),
108            recency=round(recency_score, 2),
109            expertise=round(expertise_score, 2),
110            bias_score=round(bias_score, 2),
111            factors=factors,
112            recommendation=recommendation
113        )
114 
115    def _extract_domain(self, url: str) -> str:
116        """Extract domain from URL"""
117        parsed = urlparse(url)
118        domain = parsed.netloc.lower()
119        # Remove www prefix
120        domain = domain.replace('www.', '')
121        return domain
122 
123    def _evaluate_domain_authority(self, domain: str) -> float:
124        """Evaluate domain authority (0-100)"""
125        if domain in self.HIGH_AUTHORITY_DOMAINS:
126            return 90.0
127        elif domain in self.MODERATE_AUTHORITY_DOMAINS:
128            return 70.0
129        elif any(indicator in domain for indicator in self.LOW_AUTHORITY_INDICATORS):
130            return 40.0
131        else:
132            # Unknown domain - moderate skepticism
133            return 55.0
134 
135    def _evaluate_recency(self, publication_date: Optional[str]) -> float:
136        """Evaluate information recency (0-100)"""
137        if not publication_date:
138            return 50.0  # Unknown date
139 
140        try:
141            pub_date = datetime.fromisoformat(publication_date.replace('Z', '+00:00'))
142            age = datetime.now() - pub_date
143 
144            # Recency scoring
145            if age < timedelta(days=90):  # < 3 months
146                return 100.0
147            elif age < timedelta(days=365):  # < 1 year
148                return 85.0
149            elif age < timedelta(days=730):  # < 2 years
150                return 70.0
151            elif age < timedelta(days=1825):  # < 5 years
152                return 50.0
153            else:
154                return 30.0
155 
156        except Exception:
157            return 50.0
158 
159    def _evaluate_expertise(
160        self,
161        domain: str,
162        title: str,
163        author: Optional[str]
164    ) -> float:
165        """Evaluate source expertise (0-100)"""
166        score = 50.0
167 
168        # Academic/research domains get high expertise
169        if any(d in domain for d in ['arxiv', 'nature', 'science', 'ieee', 'acm']):
170            score += 30
171 
172        # Government/official sources
173        if '.gov' in domain or 'who.int' in domain:
174            score += 25
175 
176        # Technical documentation
177        if 'docs.' in domain or 'documentation' in title.lower():
178            score += 20
179 
180        # Author credentials (if available)
181        if author:
182            if any(title in author.lower() for title in ['dr.', 'phd', 'professor']):
183                score += 15
184 
185        return min(score, 100.0)
186 
187    def _evaluate_bias(
188        self,
189        domain: str,
190        title: str,
191        content: Optional[str]
192    ) -> float:
193        """Evaluate potential bias (0-100, higher = more neutral)"""
194        score = 70.0  # Start neutral
195 
196        # Check for sensationalism in title
197        sensational_indicators = [
198            '!', 'shocking', 'unbelievable', 'you won\'t believe',
199            'secret', 'they don\'t want you to know'
200        ]
201        title_lower = title.lower()
202        if any(indicator in title_lower for indicator in sensational_indicators):
203            score -= 20
204 
205        # Academic sources are typically less biased
206        if any(d in domain for d in ['arxiv', 'nature', 'science', 'ieee']):
207            score += 20
208 
209        # Check for balance in content (if available)
210        if content:
211            # Look for balanced language
212            balanced_indicators = ['however', 'although', 'on the other hand', 'critics argue']
213            if any(indicator in content.lower() for indicator in balanced_indicators):
214                score += 10
215 
216        return min(max(score, 0), 100.0)
217 
218    def _identify_factors(
219        self,
220        domain: str,
221        domain_score: float,
222        recency_score: float,
223        expertise_score: float,
224        bias_score: float
225    ) -> Dict[str, str]:
226        """Identify key credibility factors"""
227        factors = {}
228 
229        if domain_score >= 85:
230            factors['domain'] = "High authority domain"
231        elif domain_score <= 45:
232            factors['domain'] = "Low authority domain - verify claims"
233 
234        if recency_score >= 85:
235            factors['recency'] = "Recent information"
236        elif recency_score <= 40:
237            factors['recency'] = "Outdated information - verify currency"
238 
239        if expertise_score >= 80:
240            factors['expertise'] = "Expert source"
241        elif expertise_score <= 45:
242            factors['expertise'] = "Limited expertise indicators"
243 
244        if bias_score >= 80:
245            factors['bias'] = "Balanced perspective"
246        elif bias_score <= 50:
247            factors['bias'] = "Potential bias detected"
248 
249        return factors
250 
251    def _generate_recommendation(self, overall_score: float) -> str:
252        """Generate trust recommendation"""
253        if overall_score >= 80:
254            return "high_trust"
255        elif overall_score >= 60:
256            return "moderate_trust"
257        elif overall_score >= 40:
258            return "low_trust"
259        else:
260            return "verify"
261 
262 
263# Example usage
264if __name__ == '__main__':
265    evaluator = SourceEvaluator()
266 
267    # Test sources
268    test_sources = [
269        {
270            'url': 'https://www.nature.com/articles/s41586-2025-12345',
271            'title': 'Breakthrough in Quantum Computing',
272            'publication_date': '2025-10-15'
273        },
274        {
275            'url': 'https://someblog.wordpress.com/shocking-discovery',
276            'title': 'SHOCKING! You Won\'t Believe This Discovery!',
277            'publication_date': '2020-01-01'
278        },
279        {
280            'url': 'https://docs.python.org/3/library/asyncio.html',
281            'title': 'asyncio — Asynchronous I/O',
282            'publication_date': '2025-11-01'
283        }
284    ]
285 
286    for source in test_sources:
287        score = evaluator.evaluate_source(**source)
288        print(f"\nSource: {source['title']}")
289        print(f"URL: {source['url']}")
290        print(f"Overall Score: {score.overall_score}/100")
291        print(f"Recommendation: {score.recommendation}")
292        print(f"Factors: {score.factors}")
293

Marketplace

Source from repo

Deep Research

Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.

199-biotechnologiesGitHub 199-biotechnologiesSource repo Original GitHub link

Files

Skill

n/a

Size

221.7 KB

Entrypoint

SKILL.md

Format

git-repo

Open file

scripts/source_evaluator.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code293 linesFree

scripts/source_evaluator.py

1#!/usr/bin/env python3
2"""
3Source Credibility Evaluator
4Assesses source quality, credibility, and potential biases
5"""
6 
7from dataclasses import dataclass
8from typing import List, Dict, Optional
9from urllib.parse import urlparse
10from datetime import datetime, timedelta
11import re
12 
13 
14@dataclass
15class CredibilityScore:
16    """Represents source credibility assessment"""
17    overall_score: float  # 0-100
18    domain_authority: float  # 0-100
19    recency: float  # 0-100
20    expertise: float  # 0-100
21    bias_score: float  # 0-100 (higher = more neutral)
22    factors: Dict[str, str]
23    recommendation: str  # "high_trust", "moderate_trust", "low_trust", "verify"
24 
25 
26class SourceEvaluator:
27    """Evaluates source credibility and quality"""
28 
29    # Domain reputation tiers
30    HIGH_AUTHORITY_DOMAINS = {
31        # Academic & Research
32        'arxiv.org', 'nature.com', 'science.org', 'cell.com', 'nejm.org',
33        'thelancet.com', 'springer.com', 'sciencedirect.com', 'plos.org',
34        'ieee.org', 'acm.org', 'pubmed.ncbi.nlm.nih.gov',
35 
36        # Government & International Organizations
37        'nih.gov', 'cdc.gov', 'who.int', 'fda.gov', 'nasa.gov',
38        'gov.uk', 'europa.eu', 'un.org',
39 
40        # Established Tech Documentation
41        'docs.python.org', 'developer.mozilla.org', 'docs.microsoft.com',
42        'cloud.google.com', 'aws.amazon.com', 'kubernetes.io',
43 
44        # Reputable News (Fact-check verified)
45        'reuters.com', 'apnews.com', 'bbc.com', 'economist.com',
46        'nature.com/news', 'scientificamerican.com'
47    }
48 
49    MODERATE_AUTHORITY_DOMAINS = {
50        # Tech News & Analysis
51        'techcrunch.com', 'theverge.com', 'arstechnica.com', 'wired.com',
52        'zdnet.com', 'cnet.com',
53 
54        # Industry Publications
55        'forbes.com', 'bloomberg.com', 'wsj.com', 'ft.com',
56 
57        # Educational
58        'wikipedia.org', 'britannica.com', 'khanacademy.org',
59 
60        # Tech Blogs (established)
61        'medium.com', 'dev.to', 'stackoverflow.com', 'github.com'
62    }
63 
64    LOW_AUTHORITY_INDICATORS = [
65        'blogspot.com', 'wordpress.com', 'wix.com', 'substack.com'
66    ]
67 
68    def __init__(self):
69        pass
70 
71    def evaluate_source(
72        self,
73        url: str,
74        title: str,
75        content: Optional[str] = None,
76        publication_date: Optional[str] = None,
77        author: Optional[str] = None
78    ) -> CredibilityScore:
79        """Evaluate source credibility"""
80 
81        domain = self._extract_domain(url)
82 
83        # Calculate component scores
84        domain_score = self._evaluate_domain_authority(domain)
85        recency_score = self._evaluate_recency(publication_date)
86        expertise_score = self._evaluate_expertise(domain, title, author)
87        bias_score = self._evaluate_bias(domain, title, content)
88 
89        # Calculate overall score (weighted average)
90        overall = (
91            domain_score * 0.35 +
92            recency_score * 0.20 +
93            expertise_score * 0.25 +
94            bias_score * 0.20
95        )
96 
97        # Determine factors
98        factors = self._identify_factors(
99            domain, domain_score, recency_score, expertise_score, bias_score
100        )
101 
102        # Generate recommendation
103        recommendation = self._generate_recommendation(overall)
104 
105        return CredibilityScore(
106            overall_score=round(overall, 2),
107            domain_authority=round(domain_score, 2),
108            recency=round(recency_score, 2),
109            expertise=round(expertise_score, 2),
110            bias_score=round(bias_score, 2),
111            factors=factors,
112            recommendation=recommendation
113        )
114 
115    def _extract_domain(self, url: str) -> str:
116        """Extract domain from URL"""
117        parsed = urlparse(url)
118        domain = parsed.netloc.lower()
119        # Remove www prefix
120        domain = domain.replace('www.', '')
121        return domain
122 
123    def _evaluate_domain_authority(self, domain: str) -> float:
124        """Evaluate domain authority (0-100)"""
125        if domain in self.HIGH_AUTHORITY_DOMAINS:
126            return 90.0
127        elif domain in self.MODERATE_AUTHORITY_DOMAINS:
128            return 70.0
129        elif any(indicator in domain for indicator in self.LOW_AUTHORITY_INDICATORS):
130            return 40.0
131        else:
132            # Unknown domain - moderate skepticism
133            return 55.0
134 
135    def _evaluate_recency(self, publication_date: Optional[str]) -> float:
136        """Evaluate information recency (0-100)"""
137        if not publication_date:
138            return 50.0  # Unknown date
139 
140        try:
141            pub_date = datetime.fromisoformat(publication_date.replace('Z', '+00:00'))
142            age = datetime.now() - pub_date
143 
144            # Recency scoring
145            if age < timedelta(days=90):  # < 3 months
146                return 100.0
147            elif age < timedelta(days=365):  # < 1 year
148                return 85.0
149            elif age < timedelta(days=730):  # < 2 years
150                return 70.0
151            elif age < timedelta(days=1825):  # < 5 years
152                return 50.0
153            else:
154                return 30.0
155 
156        except Exception:
157            return 50.0
158 
159    def _evaluate_expertise(
160        self,
161        domain: str,
162        title: str,
163        author: Optional[str]
164    ) -> float:
165        """Evaluate source expertise (0-100)"""
166        score = 50.0
167 
168        # Academic/research domains get high expertise
169        if any(d in domain for d in ['arxiv', 'nature', 'science', 'ieee', 'acm']):
170            score += 30
171 
172        # Government/official sources
173        if '.gov' in domain or 'who.int' in domain:
174            score += 25
175 
176        # Technical documentation
177        if 'docs.' in domain or 'documentation' in title.lower():
178            score += 20
179 
180        # Author credentials (if available)
181        if author:
182            if any(title in author.lower() for title in ['dr.', 'phd', 'professor']):
183                score += 15
184 
185        return min(score, 100.0)
186 
187    def _evaluate_bias(
188        self,
189        domain: str,
190        title: str,
191        content: Optional[str]
192    ) -> float:
193        """Evaluate potential bias (0-100, higher = more neutral)"""
194        score = 70.0  # Start neutral
195 
196        # Check for sensationalism in title
197        sensational_indicators = [
198            '!', 'shocking', 'unbelievable', 'you won\'t believe',
199            'secret', 'they don\'t want you to know'
200        ]
201        title_lower = title.lower()
202        if any(indicator in title_lower for indicator in sensational_indicators):
203            score -= 20
204 
205        # Academic sources are typically less biased
206        if any(d in domain for d in ['arxiv', 'nature', 'science', 'ieee']):
207            score += 20
208 
209        # Check for balance in content (if available)
210        if content:
211            # Look for balanced language
212            balanced_indicators = ['however', 'although', 'on the other hand', 'critics argue']
213            if any(indicator in content.lower() for indicator in balanced_indicators):
214                score += 10
215 
216        return min(max(score, 0), 100.0)
217 
218    def _identify_factors(
219        self,
220        domain: str,
221        domain_score: float,
222        recency_score: float,
223        expertise_score: float,
224        bias_score: float
225    ) -> Dict[str, str]:
226        """Identify key credibility factors"""
227        factors = {}
228 
229        if domain_score >= 85:
230            factors['domain'] = "High authority domain"
231        elif domain_score <= 45:
232            factors['domain'] = "Low authority domain - verify claims"
233 
234        if recency_score >= 85:
235            factors['recency'] = "Recent information"
236        elif recency_score <= 40:
237            factors['recency'] = "Outdated information - verify currency"
238 
239        if expertise_score >= 80:
240            factors['expertise'] = "Expert source"
241        elif expertise_score <= 45:
242            factors['expertise'] = "Limited expertise indicators"
243 
244        if bias_score >= 80:
245            factors['bias'] = "Balanced perspective"
246        elif bias_score <= 50:
247            factors['bias'] = "Potential bias detected"
248 
249        return factors
250 
251    def _generate_recommendation(self, overall_score: float) -> str:
252        """Generate trust recommendation"""
253        if overall_score >= 80:
254            return "high_trust"
255        elif overall_score >= 60:
256            return "moderate_trust"
257        elif overall_score >= 40:
258            return "low_trust"
259        else:
260            return "verify"
261 
262 
263# Example usage
264if __name__ == '__main__':
265    evaluator = SourceEvaluator()
266 
267    # Test sources
268    test_sources = [
269        {
270            'url': 'https://www.nature.com/articles/s41586-2025-12345',
271            'title': 'Breakthrough in Quantum Computing',
272            'publication_date': '2025-10-15'
273        },
274        {
275            'url': 'https://someblog.wordpress.com/shocking-discovery',
276            'title': 'SHOCKING! You Won\'t Believe This Discovery!',
277            'publication_date': '2020-01-01'
278        },
279        {
280            'url': 'https://docs.python.org/3/library/asyncio.html',
281            'title': 'asyncio — Asynchronous I/O',
282            'publication_date': '2025-11-01'
283        }
284    ]
285 
286    for source in test_sources:
287        score = evaluator.evaluate_source(**source)
288        print(f"\nSource: {source['title']}")
289        print(f"URL: {source['url']}")
290        print(f"Overall Score: {score.overall_score}/100")
291        print(f"Recommendation: {score.recommendation}")
292        print(f"Factors: {score.factors}")
293

Deep Research

scripts/source_evaluator.py

Preparing the source view

Deep Research

scripts/source_evaluator.py