crawl4ai
crawl4ai copied to clipboard
AttributeError: 'CrawlResult' object has no attribute 'fit_markdown'
import asyncio from crawl4ai import AsyncWebCrawler import json
async def main(): async with AsyncWebCrawler(verbos=True) as crawler: result = await crawler.arun( url="https://batteriesnews.com/lg-chem-files-lawsuit-against-unit-of-chinas-ronbay-over-battery-tech", bypass_cache=True, word_count_threshold=10, ) print(result.fit_markdown) # Print clean markdown content
AttributeError: 'CrawlResult' object has no attribute 'fit_markdown' result.markdown no problem
@wg20170107 Make sure to update your library (current version 0.3.72). If not, reinstall it with pip using 'pip install -U cralw4ai', as it already includes the added feature, which should be the only reason.
`import asyncio from crawl4ai import AsyncWebCrawler import json
async def main():
async with AsyncWebCrawler(verbos=True) as crawler:
result = await crawler.arun(
url="https://www.accenture.com/us-en/insights/song/generative-ai-customer-growth",
word_count_threshold=10,
)
print(result.fit_markdown) # Print clean markdown content
if name == "main": asyncio.run(main())`
OUTPUT: [INIT].... → Crawl4AI 0.4.247 [FETCH]... ↓ https://www.accenture.com/us-en/insights/song/gene... | Status: True | Time: 0.01s [COMPLETE] ● https://www.accenture.com/us-en/insights/song/gene... | Status: True | Total: 0.01s None
result.markdown no problem
Hi @tonyOgbonna Please check documentation, now we are at version 0.4.247, certain things has changed, specially markdown.
i am also getting this issue. crawl4ai>=0.5.0.post8. this is from the source code
class CrawlerRunConfig():
def __init__(
self,
# Content Processing Parameters
word_count_threshold: int = MIN_WORD_THRESHOLD,
extraction_strategy: ExtractionStrategy = None,
chunking_strategy: ChunkingStrategy = RegexChunking(),
markdown_generator: MarkdownGenerationStrategy = None,
only_text: bool = False,
css_selector: str = None,
target_elements: List[str] = None,
excluded_tags: list = None,
excluded_selector: str = None,
keep_data_attributes: bool = False,
keep_attrs: list = None,
remove_forms: bool = False,
prettiify: bool = False,
parser_type: str = "lxml",
scraping_strategy: ContentScrapingStrategy = None,
proxy_config: Union[ProxyConfig, dict, None] = None,
proxy_rotation_strategy: Optional[ProxyRotationStrategy] = None,
# SSL Parameters
fetch_ssl_certificate: bool = False,
# Caching Parameters
cache_mode: CacheMode = CacheMode.BYPASS,
session_id: str = None,
bypass_cache: bool = False,
disable_cache: bool = False,
no_cache_read: bool = False,
no_cache_write: bool = False,
shared_data: dict = None,
# Page Navigation and Timing Parameters
wait_until: str = "domcontentloaded",
page_timeout: int = PAGE_TIMEOUT,
wait_for: str = None,
wait_for_images: bool = False,
delay_before_return_html: float = 0.1,
mean_delay: float = 0.1,
max_range: float = 0.3,
semaphore_count: int = 5,
# Page Interaction Parameters
js_code: Union[str, List[str]] = None,
js_only: bool = False,
ignore_body_visibility: bool = True,
scan_full_page: bool = False,
scroll_delay: float = 0.2,
process_iframes: bool = False,
remove_overlay_elements: bool = False,
simulate_user: bool = False,
override_navigator: bool = False,
magic: bool = False,
adjust_viewport_to_content: bool = False,
# Media Handling Parameters
screenshot: bool = False,
screenshot_wait_for: float = None,
screenshot_height_threshold: int = SCREENSHOT_HEIGHT_TRESHOLD,
pdf: bool = False,
image_description_min_word_threshold: int = IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD,
image_score_threshold: int = IMAGE_SCORE_THRESHOLD,
table_score_threshold: int = 7,
exclude_external_images: bool = False,
# Link and Domain Handling Parameters
exclude_social_media_domains: list = None,
exclude_external_links: bool = False,
exclude_social_media_links: bool = False,
exclude_domains: list = None,
exclude_internal_links: bool = False,
# Debugging and Logging Parameters
verbose: bool = True,
log_console: bool = False,
# Connection Parameters
method: str = "GET",
stream: bool = False,
url: str = None,
check_robots_txt: bool = False,
user_agent: str = None,
user_agent_mode: str = None,
user_agent_generator_config: dict = {},
# Deep Crawl Parameters
deep_crawl_strategy: Optional[DeepCrawlStrategy] = None,
# Experimental Parameters
experimental: Dict[str, Any] = None,
):
as you can see it is literally not an option to be passed in but literally the 2nd code snippet in the docs push me to use this option so i am confused on whether it is deprecated or i am messing up https://docs.crawl4ai.com/core/simple-crawling/
@ekeric13 did you happen to find any solution for this ?