ipynbdiff-0.3.8/0000755000175100017510000000000014153336547012535 5ustar pravipraviipynbdiff-0.3.8/.gitignore0000644000175100017510000000004014153336547014517 0ustar pravipravi.tool-versions .bundle *.gem ipynbdiff-0.3.8/Gemfile.lock0000644000175100017510000000114014153336547014753 0ustar pravipraviGEM remote: https://rubygems.org/ specs: diff-lcs (1.4.4) diffy (3.3.0) json (2.5.1) rspec (3.10.0) rspec-core (~> 3.10.0) rspec-expectations (~> 3.10.0) rspec-mocks (~> 3.10.0) rspec-core (3.10.1) rspec-support (~> 3.10.0) rspec-expectations (3.10.1) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.10.0) rspec-mocks (3.10.2) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.10.0) rspec-support (3.10.2) PLATFORMS x86_64-darwin-20 DEPENDENCIES diffy (= 3.3.0) json (= 2.5.1) rspec (= 3.10.0) BUNDLED WITH 2.2.29 ipynbdiff-0.3.8/lib/0000755000175100017510000000000014153336547013303 5ustar pravipraviipynbdiff-0.3.8/lib/output_transformer.rb0000644000175100017510000000364314153336547017620 0ustar pravipravi# frozen_string_literal: true module IpynbDiff # Transforms Jupyter output data into markdown class OutputTransformer ORDERED_KEYS = { 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain], 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex] }.freeze def transform(output) case (output_type = output['output_type']) when 'error' transform_error(output['traceback']) when 'execute_result', 'display_data' transform_non_error(ORDERED_KEYS[output_type], output['data']) end end def transform_error(traceback) traceback.map do |t| t.split("\n").map do |line| line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n" end end end def transform_non_error(accepted_keys, elements) accepted_keys.map do |key| transform_element(key, elements[key]) if elements.key?(key) end.flatten end def transform_element(output_type, output_element) case output_type when 'image/png', 'image/jpeg' transform_image(output_type, output_element) when 'image/svg+xml' transform_svg(output_element) when 'text/markdown', 'text/latex', 'text/plain' transform_text(output_element) end end def transform_image(image_type, image_content) [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"] end def transform_svg(image_content) lines = image_content.is_a?(Array) ? image_content : [image_content] single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ') [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"] end def transform_text(text_content) lines = text_content.is_a?(Array) ? text_content : [text_content] lines.map { |line| " #{line}" }.append("\n") end end end ipynbdiff-0.3.8/lib/ipynbdiff.rb0000644000175100017510000000300414153336547015577 0ustar pravipravi# frozen_string_literal: true # Human Readable Jupyter Diffs module IpynbDiff require 'transformer' require 'diffy' @default_transform_options = { include_metadata: false, cell_decorator: :html } @default_diff_options = { preprocess_input: true, write_output_to: nil, format: :text, sources_are_files: false, raise_if_invalid_notebook: false, transform_options: @default_transform_options, diff_opts: { include_diff_info: false } }.freeze def self.prepare_input(to_prepare, options) return '' unless to_prepare prep = to_prepare prep = File.read(prep) if options[:sources_are_files] prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input] prep end def self.diff( from_notebook, to_notebook, options = @default_diff_options ) options = @default_diff_options.merge(options) from = prepare_input(from_notebook, options) to = prepare_input(to_notebook, options) d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format]) File.write(options[:write_output_to], d) if options[:write_output_to] d rescue InvalidNotebookError raise if options[:raise_if_invalid_notebook] end def self.transform(notebook, raise_errors: false, options: @default_transform_options) options = @default_transform_options.merge(options) Transformer.new(**options).transform(notebook) rescue InvalidNotebookError raise if raise_errors end end ipynbdiff-0.3.8/lib/transformer.rb0000644000175100017510000000562214153336547016177 0ustar pravipravi# frozen_string_literal: true module IpynbDiff class InvalidNotebookError < StandardError end # Returns a markdown version of the Jupyter Notebook class Transformer require 'json' require 'yaml' require 'output_transformer' @cell_decorator = :html @include_metadata = true def initialize(include_metadata: true, cell_decorator: :html) @include_metadata = include_metadata @cell_decorator = cell_decorator @output_transformer = OutputTransformer.new end def validate_notebook(notebook) notebook_json = JSON.parse(notebook) return notebook_json if notebook_json.key?('cells') raise InvalidNotebookError rescue JSON::ParserError raise InvalidNotebookError end def transform(notebook) notebook_json = validate_notebook(notebook) transformed_blocks = notebook_json['cells'].map do |cell| decorate_cell(transform_cell(cell, notebook_json), cell) end transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata transformed_blocks.join("\n") end def decorate_cell(rows, cell) tags = cell['metadata']&.fetch('tags', []) type = cell['cell_type'] || 'raw' case @cell_decorator when :html rows.prepend(%(
\n\n)) .append("\n
\n") when :percent rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n)) else rows end.join('') end def transform_cell(cell, notebook) cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell) end def decorate_output(output_rows, output) if @cell_decorator == :html output_rows.prepend(%(\n
\n\n)).append("\n
\n") else output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n)) end end def transform_code_cell(cell, notebook) [ %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n), *cell['source'], "\n```\n", *cell['outputs'].map { |output| transform_output(output) } ] end def transform_output(output) transformed = @output_transformer.transform(output) decorate_output(transformed, output).join('') if transformed end def transform_text_cell(cell) source = cell['source'] (source.is_a?(Array) ? source : [source]).append("\n") end def transform_metadata(notebook_json) { 'jupyter' => { 'kernelspec' => notebook_json['metadata']['kernelspec'], 'language_info' => notebook_json['metadata']['language_info'], 'nbformat' => notebook_json['nbformat'], 'nbformat_minor' => notebook_json['nbformat_minor'] } }.to_yaml + "---\n" end end end ipynbdiff-0.3.8/README.md0000644000175100017510000000411414153336547014014 0ustar pravipravi# IpynbDiff: Better diff for Jupyter Notebooks This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb) into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means that the entire file is readable on the diff. The result are diffs that are much easier to read: | Diff | IpynbDiff - HTML | IpynbDiff - Percent | | ------ | ------ | ------ | | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) | | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) | This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff), but now has extended functionality although not working as git driver. ## Usage ### Generating diffs ```ruby IpynbDiff.diff(from_path, to_path, options) ``` Options: ```ruby @default_transform_options = { preprocess_input: true, # Whether the input should be transformed write_output_to: nil, # Pass a path to save the output to a file format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy sources_are_files: false, # Weather to use the from/to as string or path to a file raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil transform_options: @default_transform_options, # See below for transform options diff_opts: { include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy } } ``` ### Transforming the notebooks It might be necessary to have the transformed files in addition to the diff. ```ruby IpynbDiff.transform(notebook, options) ``` Options: ```ruby @default_transform_options = { include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc) cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format } ``` ipynbdiff-0.3.8/.gitlab-ci.yml0000644000175100017510000000013614153336547015171 0ustar pravipravispecs: stage: test image: ruby:2.7 script: - bundle install - bundle exec rspec ipynbdiff-0.3.8/ipynbdiff.gemspec0000644000175100017510000000215714153336547016061 0ustar pravipravi# frozen_string_literal: true Gem::Specification.new do |s| s.name = 'ipynbdiff' s.version = ENV['LIB_VERSION'] s.summary = 'Human Readable diffs for Jupyter Notebooks' s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter' s.authors = ['Eduardo Bonet'] s.email = 'ebonet@gitlab.com' # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. s.files = Dir.chdir(File.expand_path('..', __FILE__)) do `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) } end s.homepage = 'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff' s.license = 'MIT' s.require_paths = ['lib'] s.add_runtime_dependency 'diffy', '3.3.0' s.add_runtime_dependency 'json', '2.5.1' s.add_development_dependency 'bundler', '~> 2.2' s.add_development_dependency 'guard-rspec' s.add_development_dependency 'pry' s.add_development_dependency 'rake' s.add_development_dependency 'rspec' end ipynbdiff-0.3.8/Gemfile0000644000175100017510000000017514153336547014033 0ustar pravipravi# frozen_string_literal: true source "https://rubygems.org" gem 'diffy', '3.3.0' gem 'json', '2.5.1' gem 'rspec', '3.10.0'