blob: b5ccc1046b75f57229446c428a4b3abe7752416c [file] [log] [blame]
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
#define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
#include <stdint.h>
#include <span>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "cpp/fpdf_scopers.h"
#include "form_filler.h"
#include "form_widget_info.h"
#include "fpdfview.h"
#include "rect.h"
namespace pdfClient {
// Render Flags corresponding to each render flag defined in
// 'pdf/framework/java/android/graphics/pdf/RenderParams.java'
// LINT.IfChange
static const int FLAG_RENDER_TEXT_ANNOTATIONS = 1 << 1;
static const int FLAG_RENDER_HIGHLIGHT_ANNOTATIONS = 1 << 2;
// LINT.ThenChange(packages/providers/MediaProvider/pdf/framework/java/android/graphics/pdf/RenderParams.java)
static const std::unordered_map<int, std::vector<int>> renderFlagsAnnotsMap = {
{FLAG_RENDER_TEXT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_TEXT, FPDF_ANNOT_FREETEXT}},
{FLAG_RENDER_HIGHLIGHT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_HIGHLIGHT}}};
// A start index (inclusive) and a stop index (exclusive) into the string of
// codepoints that make up a range of text.
typedef std::pair<int, int> TextRange;
// A start index (inclusive) or stop index (exclusive) into the string of
// codepoints that make up a range of text, and a point on the boundary where
// the selection starts or stops.
struct SelectionBoundary {
int index;
Point_i point;
bool is_rtl;
SelectionBoundary(int i, int x, int y, bool r) : index(i), is_rtl(r) { point = IntPoint(x, y); }
};
struct GotoLinkDest {
int page_number = 0;
float x = 0;
float y = 0;
float zoom = 0;
void set_page_number(int page_number) { this->page_number = page_number; }
void set_x(float x) { this->x = x; }
void set_y(float y) { this->y = y; }
void set_zoom(float zoom) { this->zoom = zoom; }
};
struct GotoLink {
std::vector<Rectangle_i> rect;
GotoLinkDest dest;
};
// Wrapper on a FPDF_PAGE that adds rendering functionality.
class Page {
public:
// FPDF_PAGE is opened when constructed.
Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler);
// Move constructor.
Page(Page&& p);
virtual ~Page();
int Width() const;
int Height() const;
Rectangle_i Dimensions() const;
// Render the page to the output bitmap, applying the appropriate transform, clip, and
// render mode as specified.
void Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
int clip_right, int clip_bottom, int render_mode, int show_annot_types,
bool render_form_fields);
// The page has a transform that must be applied to all characters and objects
// on the page. This transforms from the page's internal co-ordinate system
// to the external co-ordinate system from (0, 0) to (Width(), Height()).
Point_i ApplyPageTransform(const Point_d& input) const;
Rectangle_i ApplyPageTransform(const Rectangle_d& input) const;
Rectangle_i ApplyPageTransform(const Rectangle_i& input) const;
// Transform from the external co-ordinate system (0, 0)-(Width(), Height())
// back into the page's internal co-ordinate system.
Point_d UnapplyPageTransform(const Point_i& input) const;
int NumChars();
uint32_t GetUnicode(int char_index);
// Returns the entire text of the given page in UTF-8.
std::string GetTextUtf8();
// Returns part of the text of the given page in UTF-8.
std::string GetTextUtf8(const int start_index, const int stop_index);
// Appends each alt-text instance on the page to |result|.
void GetAltTextUtf8(std::vector<std::string>* result) const;
// Searches for the given word on the given page and returns the number of
// matches. Ignores case and accents when searching.
// If matches vector is not NULL, it is filled with the start and end indices
// of each match - these are character indices according to FPDFText API.
int FindMatchesUtf8(std::string_view utf8, std::vector<TextRange>* matches);
// Same as above, but finds the bounding boxes of the matches. Returns the
// number of matches and fills in the rects vector. Each match can take more
// than one rect to bound, so the match_to_rect vector is filled so that
// rects[match_to_rect[i]] is the first rectangle that belongs with match i.
// Matches for which we cannot find a single bounding rectangle are discarded.
// The char_indexes vector is filled with the char index that each match
// starts at - the beginning of its TextRange.
int BoundsOfMatchesUtf8(std::string_view utf8, std::vector<Rectangle_i>* rects,
std::vector<int>* match_to_rect, std::vector<int>* char_indexes);
// Appends 0 or more rectangles to the given vector that surround the text
// of the given page from the start index and the stop index.
// Returns the number of rectangles used to surround the text.
int GetTextBounds(const int start_index, const int stop_index, std::vector<Rectangle_i>* rects);
// If there is a word at the given point, returns true and modifies the given
// boundaries to point to each end of the word - otherwise returns false.
bool SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop);
// Modifies the given selection boundary object in the following ways:
// - The resulting boundary will have an index that is within the range
// [0...n], where n is NumChars().
// - The resulting boundary will have a point that is at the outer corner
// of the char just inside the selection.
void ConstrainBoundary(SelectionBoundary* boundary);
int GetFontSize(int index);
// Get the URLs and bounding rectangles for all links on the page.
int GetLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
std::vector<std::string>* urls) const;
// Returns the list of GotoLink for all GotoLinks on the page.
std::vector<GotoLink> GetGotoLinks() const;
// Perform any operations required to prepare this page for form filling.
void InitializeFormFilling();
// Perform any clean up operations after form filling is complete.
void TerminateFormFilling();
// Obtain information about the form widget at |point| on the page, if any.
// |point| is in device coordinates.
FormWidgetInfo GetFormWidgetInfo(Point_i point);
// Obtain information about the form widget with index |annotation_index| on
// the page, if any.
FormWidgetInfo GetFormWidgetInfo(int annotation_index);
// Obtain form widget information for all form field annotations on the page,
// optionally restricting by |type_ids| and store in |widget_infos|. See
// fpdf_formfill.h for type constants. If |type_ids| is empty all form
// widgets on |page| will be added to |widget_infos|, if any.
void GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
std::vector<FormWidgetInfo>* widget_infos);
// Perform a click at |point| on the page. Any focus in the document
// resulting from this operation will be killed before returning. No-op if
// no widget present at |point| or widget cannot be edited. Returns true if
// click was performed. |point| is in device coordinates.
bool ClickOnPoint(Point_i point);
// Set the value text of the widget at |annotation_index| on page. No-op if
// no widget present or widget cannot be edited. Returns true if text was
// set, false otherwise.
bool SetFormFieldText(int annotation_index, std::string_view text);
// Set the |selected_indices| for the choice widget at |annotation_index| as
// selected and deselect all other indices. No-op if no widget present or
// widget cannot be edited. Returns true if indices were set, false otherwise.
bool SetChoiceSelection(int annotation_index, std::span<const int> selected_indices);
// Informs the page that the |rect| of the page bitmap has been invalidated.
// This takes place following form filling operations. |Rect| must be in page
// coordinates.
void NotifyInvalidRect(Rectangle_i rect);
// Return whether or not an area of the bitmap has been invalidated.
bool HasInvalidRect();
// Returns the area of the page that has been invalidated and resets the
// field. Rect returned in device coordinates.
Rectangle_i ConsumeInvalidRect();
// Returns FPDF_PAGE. This Page retains ownership. All operations that wish
// to access FPDF_PAGE should to call methods of this class instead of
// requesting the FPDF_PAGE directly through this method.
void* page();
private:
// Convenience methods to access the variables dependent on an initialized
// ScopedFPDFTextPage. We lazy init text_page_ for efficiency because many
// page operations do not require it.
FPDF_TEXTPAGE text_page();
int first_printable_char_index();
int last_printable_char_index();
// Check that text_page_ and first/last_printable_char_index_ have been
// initialized and do so if not.
void EnsureTextPageInitialized();
// Android bitmaps are in ARGB order. pdfClient emits bitmaps which have red and
// blue swapped when treated as Android bitmaps - but this function fixes it.
// NOTE: This might rely on little-endian architecture.
void InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const;
// Looks for an instance of the given UTF32 string on the given page, starting
// not before the page_start index and ending before the page_stop index.
// If found, returns true and updates the TextRange. Case/accent insensitive.
bool FindMatch(const std::u32string& query, const int page_start, const int page_stop,
TextRange* match);
// Checks if the page matches the given UTF32 string at the given match_start
// index that ends before the page_stop index. If it matches, returns true
// and updates the TextRange. Case/accent insensitive.
bool IsMatch(const std::u32string& query, const int match_start, const int page_stop,
TextRange* match);
// Returns a SelectionBoundary at a particular index - 0 means before the char
// at index 0, 1 means after char 0 but before the char at index 1, and so on.
SelectionBoundary GetBoundaryAtIndex(const int index);
// Returns whether text is flowing left or right at a particular index.
bool IsRtlAtIndex(const int index);
// Returns a SelectionBoundary at a particular index, once we already know
// which way the text is flowing at that index.
SelectionBoundary GetBoundaryAtIndex(const int index, bool is_rtl);
// Returns a SelectionBoundary as near as possible to the given point.
SelectionBoundary GetBoundaryAtPoint(const Point_i& point);
// Given a boundary index to the middle or either end of a word, returns
// the boundary index of the start of that word - which is the index of the
// first char that is part of that word.
int GetWordStartIndex(const int index);
// Given a boundary index to the middle or either end of a word, returns
// the boundary index of the stop of that word - which is the index of the
// first char that is immediately after that word, but not part of it.
int GetWordStopIndex(const int index);
// Returns the rectangle that bounds the given char - page transform is not
// yet applied, must be applied later.
Rectangle_d GetRawCharBounds(int char_index);
// Returns the rectangle that bounds the given char, with the page transform
// already applied.
Rectangle_i GetCharBounds(int char_index);
// Returns the origin of the given char, with the page transform applied.
Point_i GetCharOrigin(int char_index);
// Get the URLs and bounding rectangles for annotation links only - text
// that has been annotated to link to some URL.
int GetAnnotatedLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
std::vector<std::string>* urls) const;
// Get the URLs and bounding rectangles for inferred links only - text that
// we recognize as a potential link since it starts with http:// or similar.
int GetInferredLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
std::vector<std::string>* urls) const;
bool IsGotoLink(FPDF_LINK link) const;
bool IsUrlLink(FPDF_LINK link) const;
// Get the URL of the given link, in UTF-8.
std::string GetUrlUtf8(FPDF_LINK link) const;
// Get the bounds of the given link, in page co-ordinates.
Rectangle_i GetRect(FPDF_LINK link) const;
FPDF_DOCUMENT document_; // Not owned.
ScopedFPDFPage page_;
FormFiller* const form_filler_; // Not owned.
// these variables lazily initialized, should be accessed via corresponding
// accessor methods
ScopedFPDFTextPage text_page_;
int first_printable_char_index_;
int last_printable_char_index_;
// Rectangle representing an area of the bitmap for this page that has been
// reported as invalidated. Will be coalesced from all rectangles that are
// reported as invalidated since the last time this rectangle was consumed.
// Rectangles are invalidated due to form filling operations.
// Rectangle is in Device Coordinates.
Rectangle_i invalid_rect_;
};
} // namespace pdfClient
#endif // MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_