Module: Kettle::Dev::PreReleaseCLI::Markdown

Defined in:
lib/kettle/dev/pre_release_cli.rb

Overview

Markdown parsing helpers

Class Method Summary collapse

Class Method Details

.extract_image_urls_from_files(glob_pattern = "*.md") ⇒ Array<String>

Extract from files matching glob.

Parameters:

  • glob_pattern (String) (defaults to: "*.md")

Returns:

  • (Array<String>)


130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/kettle/dev/pre_release_cli.rb', line 130

def extract_image_urls_from_files(glob_pattern = "*.md")
  files = Dir.glob(glob_pattern)
  urls = files.flat_map do |f|
    begin
      extract_image_urls_from_text(File.read(f))
    rescue StandardError => e
      warn("[kettle-pre-release] Could not read #{f}: #{e.class}: #{e.message}")
      []
    end
  end
  urls.uniq
end

.extract_image_urls_from_text(text) ⇒ Array<String>

Extract unique remote HTTP(S) image URLs from markdown or HTML images.

Parameters:

  • text (String)

Returns:

  • (Array<String>)


101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/kettle/dev/pre_release_cli.rb', line 101

def extract_image_urls_from_text(text)
  urls = []

  # Inline image syntax
  text.scan(/!\[[^\]]*\]\(([^\s)]+)(?:\s+\"[^\"]*\")?\)/) { |m| urls << m[0] }

  # Reference definitions
  ref_defs = {}
  text.scan(/^\s*\[([^\]]+)\]:\s*(\S+)/) { |m| ref_defs[m[0]] = m[1] }

  # Reference image usage
  text.scan(/!\[[^\]]*\]\[([^\]]+)\]/) do |m|
    id = m[0]
    url = ref_defs[id]
    urls << url if url
  end

  # HTML <img src="...">
  text.scan(/<img\b[^>]*\bsrc\s*=\s*\"([^\"]+)\"[^>]*>/i) { |m| urls << m[0] }
  text.scan(/<img\b[^>]*\bsrc\s*=\s*\'([^\']+)\'[^>]*>/i) { |m| urls << m[0] }

  urls.reject! { |u| u.nil? || u.strip.empty? }
  urls.select! { |u| u =~ %r{^https?://}i }
  urls.uniq
end